41 #ifndef TPETRA_MATRIXMATRIX_DEF_HPP 42 #define TPETRA_MATRIXMATRIX_DEF_HPP 45 #include "Teuchos_VerboseObject.hpp" 46 #include "Teuchos_Array.hpp" 48 #include "Tpetra_ConfigDefs.hpp" 49 #include "Tpetra_CrsMatrix.hpp" 51 #include "Tpetra_RowMatrixTransposer.hpp" 52 #include "Tpetra_ConfigDefs.hpp" 53 #include "Tpetra_Map.hpp" 54 #include "Tpetra_Export.hpp" 58 #include "Teuchos_FancyOStream.hpp" 61 #ifdef HAVE_KOKKOSKERNELS_EXPERIMENTAL 62 #include "KokkosKernels_SPGEMM.hpp" 73 namespace MatrixMatrix{
81 template <
class Scalar,
91 bool call_FillComplete_on_result,
92 const std::string& label,
93 const Teuchos::RCP<Teuchos::ParameterList>& params)
98 typedef LocalOrdinal LO;
99 typedef GlobalOrdinal GO;
107 #ifdef HAVE_TPETRA_MMM_TIMINGS 108 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
109 using Teuchos::TimeMonitor;
110 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All Setup"))));
113 const std::string prefix =
"TpetraExt::MatrixMatrix::Multiply(): ";
118 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error, prefix <<
"Matrix A is not fill complete.");
119 TEUCHOS_TEST_FOR_EXCEPTION(!B.
isFillComplete(), std::runtime_error, prefix <<
"Matrix B is not fill complete.");
124 RCP<const crs_matrix_type> Aprime = null;
128 RCP<const crs_matrix_type> Bprime = null;
138 const bool newFlag = !C.
getGraph()->isLocallyIndexed() && !C.
getGraph()->isGloballyIndexed();
140 bool use_optimized_ATB =
false;
141 if (transposeA && !transposeB && call_FillComplete_on_result && newFlag)
142 use_optimized_ATB =
true;
144 #ifdef USE_OLD_TRANSPOSE // NOTE: For Grey Ballard's use. Remove this later. 145 use_optimized_ATB =
false;
148 if (!use_optimized_ATB && transposeA) {
149 transposer_type transposer(rcpFromRef (A));
150 Aprime = transposer.createTranspose();
153 Aprime = rcpFromRef(A);
157 transposer_type transposer(rcpFromRef (B));
158 Bprime = transposer.createTranspose();
161 Bprime = rcpFromRef(B);
171 TEUCHOS_TEST_FOR_EXCEPTION(Ainner != Binner, std::runtime_error,
172 prefix <<
"ERROR, inner dimensions of op(A) and op(B) " 173 "must match for matrix-matrix product. op(A) is " 174 << Aouter <<
"x" << Ainner <<
", op(B) is "<< Binner <<
"x" << Bouter);
180 TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.
getGlobalNumRows(), std::runtime_error,
181 prefix <<
"ERROR, dimensions of result C must " 183 <<
" rows, should have at least " << Aouter << std::endl);
195 int numProcs = A.
getComm()->getSize();
199 crs_matrix_struct_type Aview;
200 crs_matrix_struct_type Bview;
202 RCP<const map_type> targetMap_A = Aprime->getRowMap();
203 RCP<const map_type> targetMap_B = Bprime->getRowMap();
205 #ifdef HAVE_TPETRA_MMM_TIMINGS 206 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All I&X"))));
212 if (!use_optimized_ATB) {
213 RCP<const import_type> dummyImporter;
214 MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview, dummyImporter,
true, label, params);
220 targetMap_B = Aprime->getColMap();
223 if (!use_optimized_ATB)
224 MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter(),
false, label, params);
226 #ifdef HAVE_TPETRA_MMM_TIMINGS 227 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All Multiply"))));
231 if (use_optimized_ATB) {
232 MMdetails::mult_AT_B_newmatrix(A, B, C, label,params);
234 }
else if (call_FillComplete_on_result && newFlag) {
235 MMdetails::mult_A_B_newmatrix(Aview, Bview, C, label,params);
237 }
else if (call_FillComplete_on_result) {
238 MMdetails::mult_A_B_reuse(Aview, Bview, C, label,params);
244 CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C);
246 MMdetails::mult_A_B(Aview, Bview, crsmat, label,params);
248 #ifdef HAVE_TPETRA_MMM_TIMINGS 249 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All FillComplete"))));
251 if (call_FillComplete_on_result) {
259 C.
fillComplete(Bprime->getDomainMap(), Aprime->getRangeMap());
265 template <
class Scalar,
274 bool call_FillComplete_on_result,
275 const std::string& label,
276 const Teuchos::RCP<Teuchos::ParameterList>& params)
280 typedef LocalOrdinal LO;
281 typedef GlobalOrdinal GO;
288 #ifdef HAVE_TPETRA_MMM_TIMINGS 289 std::string prefix_mmm = std::string(
"TpetraExt ")+ label + std::string(
": ");
290 using Teuchos::TimeMonitor;
291 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm+std::string(
"Jacobi All Setup"))));
294 const std::string prefix =
"TpetraExt::MatrixMatrix::Jacobi(): ";
299 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error, prefix <<
"Matrix A is not fill complete.");
300 TEUCHOS_TEST_FOR_EXCEPTION(!B.
isFillComplete(), std::runtime_error, prefix <<
"Matrix B is not fill complete.");
302 RCP<const crs_matrix_type> Aprime = rcpFromRef(A);
303 RCP<const crs_matrix_type> Bprime = rcpFromRef(B);
312 TEUCHOS_TEST_FOR_EXCEPTION(Ainner != Binner, std::runtime_error,
313 prefix <<
"ERROR, inner dimensions of op(A) and op(B) " 314 "must match for matrix-matrix product. op(A) is " 315 << Aouter <<
"x" << Ainner <<
", op(B) is "<< Binner <<
"x" << Bouter);
321 TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.
getGlobalNumRows(), std::runtime_error,
322 prefix <<
"ERROR, dimensions of result C must " 324 <<
" rows, should have at least "<< Aouter << std::endl);
336 int numProcs = A.
getComm()->getSize();
340 crs_matrix_struct_type Aview;
341 crs_matrix_struct_type Bview;
343 RCP<const map_type> targetMap_A = Aprime->getRowMap();
344 RCP<const map_type> targetMap_B = Bprime->getRowMap();
346 #ifdef HAVE_TPETRA_MMM_TIMINGS 347 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi All I&X"))));
352 RCP<Teuchos::ParameterList> importParams1 = Teuchos::rcp(
new Teuchos::ParameterList);
353 if(!params.is_null()) importParams1->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
356 RCP<const import_type> dummyImporter;
357 MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview, dummyImporter,
false, label,importParams1);
362 targetMap_B = Aprime->getColMap();
367 RCP<Teuchos::ParameterList> importParams2 = Teuchos::rcp(
new Teuchos::ParameterList);
368 if(!params.is_null()) importParams2->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
369 MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter(),
false, label,importParams2);
371 #ifdef HAVE_TPETRA_MMM_TIMINGS 372 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi All Multiply"))));
376 CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C);
379 bool newFlag = !C.
getGraph()->isLocallyIndexed() && !C.
getGraph()->isGloballyIndexed();
381 if (call_FillComplete_on_result && newFlag) {
382 MMdetails::jacobi_A_B_newmatrix(omega, Dinv, Aview, Bview, C, label, params);
384 }
else if (call_FillComplete_on_result) {
385 MMdetails::jacobi_A_B_reuse(omega, Dinv, Aview, Bview, C, label, params);
388 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
"jacobi_A_B_general not implemented");
411 template <
class Scalar,
422 using Teuchos::Array;
426 typedef LocalOrdinal LO;
427 typedef GlobalOrdinal GO;
432 const std::string prefix =
"TpetraExt::MatrixMatrix::Add(): ";
434 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error,
435 prefix <<
"ERROR, input matrix A.isFillComplete() is false; it is required to be true. " 436 "(Result matrix B is not required to be isFillComplete()).");
437 TEUCHOS_TEST_FOR_EXCEPTION(B.
isFillComplete() , std::runtime_error,
438 prefix <<
"ERROR, input matrix B must not be fill complete!");
439 TEUCHOS_TEST_FOR_EXCEPTION(B.
isStaticGraph() , std::runtime_error,
440 prefix <<
"ERROR, input matrix B must not have static graph!");
442 prefix <<
"ERROR, input matrix B must not be locally indexed!");
444 prefix <<
"ERROR, input matrix B must have a dynamic profile!");
447 RCP<const crs_matrix_type> Aprime = null;
449 transposer_type transposer(rcpFromRef (A));
450 Aprime = transposer.createTranspose();
452 Aprime = rcpFromRef(A);
460 if (scalarB != Teuchos::ScalarTraits<SC>::one())
465 if (scalarA != Teuchos::ScalarTraits<SC>::zero()) {
466 for (LO i = 0; (size_t)i < numMyRows; ++i) {
467 row = B.
getRowMap()->getGlobalElement(i);
468 Aprime->getGlobalRowCopy(row, a_inds(), a_vals(), a_numEntries);
470 if (scalarA != Teuchos::ScalarTraits<SC>::one())
471 for (
size_t j = 0; j < a_numEntries; ++j)
472 a_vals[j] *= scalarA;
483 template <
class Scalar,
487 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
489 const bool transposeA,
492 const bool transposeB,
496 const Teuchos::RCP<Teuchos::ParameterList>& params)
499 using Teuchos::rcpFromRef;
500 using Teuchos::rcp_implicit_cast;
501 using Teuchos::rcp_dynamic_cast;
504 typedef LocalOrdinal LO;
505 typedef GlobalOrdinal GO;
511 const std::string prefix =
"TpetraExt::MatrixMatrix::add(): ";
514 prefix <<
"A and B must both be fill complete.");
516 #ifdef HAVE_TPETRA_DEBUG 519 const bool domainMapsSame =
523 TEUCHOS_TEST_FOR_EXCEPTION(domainMapsSame, std::invalid_argument,
524 prefix <<
"The domain Maps of Op(A) and Op(B) are not the same.");
526 const bool rangeMapsSame =
530 TEUCHOS_TEST_FOR_EXCEPTION(rangeMapsSame, std::invalid_argument,
531 prefix <<
"The range Maps of Op(A) and Op(B) are not the same.");
533 #endif // HAVE_TPETRA_DEBUG 536 RCP<const crs_matrix_type> Aprime;
538 transposer_type transposer (rcpFromRef (A));
539 Aprime = transposer.createTranspose ();
542 Aprime = rcpFromRef (A);
545 #ifdef HAVE_TPETRA_DEBUG 546 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
547 prefix <<
"Failed to compute Op(A). Please report this bug to the Tpetra developers.");
548 #endif // HAVE_TPETRA_DEBUG 551 RCP<const crs_matrix_type> Bprime;
553 transposer_type transposer (rcpFromRef (B));
554 Bprime = transposer.createTranspose ();
557 Bprime = rcpFromRef (B);
560 #ifdef HAVE_TPETRA_DEBUG 561 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
562 prefix <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
564 TEUCHOS_TEST_FOR_EXCEPTION(
565 !Aprime->isFillComplete () || !Bprime->isFillComplete (), std::invalid_argument,
566 prefix <<
"Aprime and Bprime must both be fill complete. " 567 "Please report this bug to the Tpetra developers.");
568 #endif // HAVE_TPETRA_DEBUG 570 RCP<row_matrix_type> C =
571 Bprime->add (alpha, *rcp_implicit_cast<const row_matrix_type> (Aprime),
572 beta, domainMap, rangeMap, params);
574 return rcp_dynamic_cast<crs_matrix_type> (C);
578 template <
class Scalar,
592 using Teuchos::Array;
593 using Teuchos::ArrayRCP;
594 using Teuchos::ArrayView;
597 using Teuchos::rcp_dynamic_cast;
598 using Teuchos::rcpFromRef;
599 using Teuchos::tuple;
602 typedef Teuchos::ScalarTraits<Scalar> STS;
610 std::string prefix =
"TpetraExt::MatrixMatrix::Add(): ";
612 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
613 prefix <<
"The case C == null does not actually work. Fixing this will require an interface change.");
615 TEUCHOS_TEST_FOR_EXCEPTION(
617 prefix <<
"Both input matrices must be fill complete before calling this function.");
619 #ifdef HAVE_TPETRA_DEBUG 621 const bool domainMapsSame =
625 TEUCHOS_TEST_FOR_EXCEPTION(domainMapsSame, std::invalid_argument,
626 prefix <<
"The domain Maps of Op(A) and Op(B) are not the same.");
628 const bool rangeMapsSame =
632 TEUCHOS_TEST_FOR_EXCEPTION(rangeMapsSame, std::invalid_argument,
633 prefix <<
"The range Maps of Op(A) and Op(B) are not the same.");
635 #endif // HAVE_TPETRA_DEBUG 638 RCP<const crs_matrix_type> Aprime;
640 transposer_type theTransposer (rcpFromRef (A));
641 Aprime = theTransposer.createTranspose ();
643 Aprime = rcpFromRef (A);
646 #ifdef HAVE_TPETRA_DEBUG 647 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
648 prefix <<
"Failed to compute Op(A). Please report this bug to the Tpetra developers.");
649 #endif // HAVE_TPETRA_DEBUG 652 RCP<const crs_matrix_type> Bprime;
654 transposer_type theTransposer (rcpFromRef (B));
655 Bprime = theTransposer.createTranspose ();
657 Bprime = rcpFromRef (B);
660 #ifdef HAVE_TPETRA_DEBUG 661 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
662 prefix <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
663 #endif // HAVE_TPETRA_DEBUG 666 if (! C.is_null ()) {
667 C->setAllToScalar (STS::zero ());
675 if (Aprime->getRowMap ()->isSameAs (* (Bprime->getRowMap ())) {
676 RCP<const map_type> rowMap = Aprime->getRowMap ();
678 RCP<const crs_graph_type> A_graph =
679 rcp_dynamic_cast<
const crs_graph_type> (Aprime->getGraph (),
true);
680 #ifdef HAVE_TPETRA_DEBUG 681 TEUCHOS_TEST_FOR_EXCEPTION(A_graph.is_null (), std::logic_error,
682 "Tpetra::MatrixMatrix::Add: Graph of Op(A) is null. " 683 "Please report this bug to the Tpetra developers.");
684 #endif // HAVE_TPETRA_DEBUG 685 RCP<const crs_graph_type> B_graph =
686 rcp_dynamic_cast<
const crs_graph_type> (Bprime->getGraph (),
true);
687 #ifdef HAVE_TPETRA_DEBUG 688 TEUCHOS_TEST_FOR_EXCEPTION(B_graph.is_null (), std::logic_error,
689 "Tpetra::MatrixMatrix::Add: Graph of Op(B) is null. " 690 "Please report this bug to the Tpetra developers.");
691 #endif // HAVE_TPETRA_DEBUG 692 RCP<const map_type> A_colMap = A_graph->getColMap ();
693 #ifdef HAVE_TPETRA_DEBUG 694 TEUCHOS_TEST_FOR_EXCEPTION(A_colMap.is_null (), std::logic_error,
695 "Tpetra::MatrixMatrix::Add: Column Map of Op(A) is null. " 696 "Please report this bug to the Tpetra developers.");
697 #endif // HAVE_TPETRA_DEBUG 698 RCP<const map_type> B_colMap = B_graph->getColMap ();
699 #ifdef HAVE_TPETRA_DEBUG 700 TEUCHOS_TEST_FOR_EXCEPTION(B_colMap.is_null (), std::logic_error,
701 "Tpetra::MatrixMatrix::Add: Column Map of Op(B) is null. " 702 "Please report this bug to the Tpetra developers.");
703 TEUCHOS_TEST_FOR_EXCEPTION(A_graph->getImporter ().is_null (),
705 "Tpetra::MatrixMatrix::Add: Op(A)'s Import is null. " 706 "Please report this bug to the Tpetra developers.");
707 TEUCHOS_TEST_FOR_EXCEPTION(B_graph->getImporter ().is_null (),
709 "Tpetra::MatrixMatrix::Add: Op(B)'s Import is null. " 710 "Please report this bug to the Tpetra developers.");
711 #endif // HAVE_TPETRA_DEBUG 714 RCP<const import_type> sumImport =
715 A_graph->getImporter ()->setUnion (* (B_graph->getImporter ()));
716 RCP<const map_type> C_colMap = sumImport->getTargetMap ();
724 ArrayView<const LocalOrdinal> A_local_ind;
725 Array<GlobalOrdinal> A_global_ind;
726 ArrayView<const LocalOrdinal> B_local_ind;
727 Array<GlobalOrdinal> B_global_ind;
729 const size_t localNumRows = rowMap->getNodeNumElements ();
730 ArrayRCP<size_t> numEntriesPerRow (localNumRows);
733 size_t maxNumEntriesPerRow = 0;
734 for (
size_t localRow = 0; localRow < localNumRows; ++localRow) {
736 A_graph->getLocalRowView (as<LocalOrdinal> (localRow), A_local_ind);
737 const size_type A_numEnt = A_local_ind.size ();
738 if (A_numEnt > A_global_ind.size ()) {
739 A_global_ind.resize (A_numEnt);
742 for (size_type k = 0; k < A_numEnt; ++k) {
743 A_global_ind[k] = A_colMap->getGlobalElement (A_local_ind[k]);
747 B_graph->getLocalRowView (as<LocalOrdinal> (localRow), B_local_ind);
748 const size_type B_numEnt = B_local_ind.size ();
749 if (B_numEnt > B_global_ind.size ()) {
750 B_global_ind.resize (B_numEnt);
753 for (size_type k = 0; k < B_numEnt; ++k) {
754 B_global_ind[k] = B_colMap->getGlobalElement (B_local_ind[k]);
758 const size_t curNumEntriesPerRow =
759 keyMergeCount (A_global_ind.begin (), A_global_ind.end (),
760 B_global_ind.begin (), B_global_ind.end ());
761 numEntriesPerRow[localRow] = curNumEntriesPerRow;
762 maxNumEntriesPerRow = std::max (maxNumEntriesPerRow, curNumEntriesPerRow);
769 C = rcp (
new crs_matrix_type (rowMap, C_colMap, numEntriesPerRow,
StaticProfile));
774 ArrayView<const Scalar> A_val;
775 ArrayView<const Scalar> B_val;
777 Array<LocalOrdinal> AplusB_local_ind (maxNumEntriesPerRow);
778 Array<GlobalOrdinal> AplusB_global_ind (maxNumEntriesPerRow);
779 Array<Scalar> AplusB_val (maxNumEntriesPerRow);
781 for (
size_t localRow = 0; localRow < localNumRows; ++localRow) {
783 Aprime->getLocalRowView (as<LocalOrdinal> (localRow), A_local_ind, A_val);
785 for (size_type k = 0; k < A_local_ind.size (); ++k) {
786 A_global_ind[k] = A_colMap->getGlobalElement (A_local_ind[k]);
790 Bprime->getLocalRowView (as<LocalOrdinal> (localRow), B_local_ind, B_val);
792 for (size_type k = 0; k < B_local_ind.size (); ++k) {
793 B_global_ind[k] = B_colMap->getGlobalElement (B_local_ind[k]);
796 const size_t curNumEntries = numEntriesPerRow[localRow];
797 ArrayView<LocalOrdinal> C_local_ind = AplusB_local_ind (0, curNumEntries);
798 ArrayView<GlobalOrdinal> C_global_ind = AplusB_global_ind (0, curNumEntries);
799 ArrayView<Scalar> C_val = AplusB_val (0, curNumEntries);
803 A_val.begin (), A_val.end (),
804 B_global_ind.begin (), B_global_ind.end (),
805 B_val.begin (), B_val.end (),
806 C_global_ind.begin (), C_val.begin (),
807 std::plus<Scalar> ());
809 for (size_type k = 0; k < as<size_type> (numEntriesPerRow[localRow]); ++k) {
810 C_local_ind[k] = C_colMap->getLocalElement (C_global_ind[k]);
813 C->replaceLocalValues (localRow, C_local_ind, C_val);
818 RCP<const map_type> domainMap = A_graph->getDomainMap ();
819 RCP<const map_type> rangeMap = A_graph->getRangeMap ();
820 C->expertStaticFillComplete (domainMap, rangeMap, sumImport, A_graph->getExporter ());
828 C = rcp (
new crs_matrix_type (Aprime->getRowMap (), null));
835 C = rcp (
new crs_matrix_type (Aprime->getRowMap (), 0));
839 #ifdef HAVE_TPETRA_DEBUG 840 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
841 prefix <<
"At this point, Aprime is null. Please report this bug to the Tpetra developers.");
842 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
843 prefix <<
"At this point, Bprime is null. Please report this bug to the Tpetra developers.");
844 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
845 prefix <<
"At this point, C is null. Please report this bug to the Tpetra developers.");
846 #endif // HAVE_TPETRA_DEBUG 848 Array<RCP<const crs_matrix_type> > Mat =
849 tuple<RCP<const crs_matrix_type> > (Aprime, Bprime);
850 Array<Scalar> scalar = tuple<Scalar> (scalarA, scalarB);
853 for (
int k = 0; k < 2; ++k) {
854 Array<GlobalOrdinal> Indices;
855 Array<Scalar> Values;
863 #ifdef HAVE_TPETRA_DEBUG 864 TEUCHOS_TEST_FOR_EXCEPTION(Mat[k].is_null (), std::logic_error,
865 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
866 #endif // HAVE_TPETRA_DEBUG 867 RCP<const map_type> curRowMap = Mat[k]->getRowMap ();
868 #ifdef HAVE_TPETRA_DEBUG 869 TEUCHOS_TEST_FOR_EXCEPTION(curRowMap.is_null (), std::logic_error,
870 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
871 #endif // HAVE_TPETRA_DEBUG 873 const size_t localNumRows = Mat[k]->getNodeNumRows ();
874 for (
size_t i = 0; i < localNumRows; ++i) {
875 const GlobalOrdinal globalRow = curRowMap->getGlobalElement (i);
876 size_t numEntries = Mat[k]->getNumEntriesInGlobalRow (globalRow);
877 if (numEntries > 0) {
878 Indices.resize (numEntries);
879 Values.resize (numEntries);
880 Mat[k]->getGlobalRowCopy (globalRow, Indices (), Values (), numEntries);
882 if (scalar[k] != STS::one ()) {
883 for (
size_t j = 0; j < numEntries; ++j) {
884 Values[j] *= scalar[k];
888 if (C->isFillComplete ()) {
889 C->sumIntoGlobalValues (globalRow, Indices, Values);
891 C->insertGlobalValues (globalRow, Indices, Values);
904 template <
class TransferType>
905 void printMultiplicationStatistics(Teuchos::RCP<TransferType > Transfer,
const std::string &label) {
906 if (Transfer.is_null())
909 const Distributor & Distor = Transfer->getDistributor();
910 Teuchos::RCP<const Teuchos::Comm<int> > Comm = Transfer->getSourceMap()->getComm();
912 size_t rows_send = Transfer->getNumExportIDs();
913 size_t rows_recv = Transfer->getNumRemoteIDs();
915 size_t round1_send = Transfer->getNumExportIDs() *
sizeof(size_t);
916 size_t round1_recv = Transfer->getNumRemoteIDs() *
sizeof(size_t);
919 size_t round2_send, round2_recv;
922 int myPID = Comm->getRank();
923 int NumProcs = Comm->getSize();
930 size_t lstats[8] = {num_send_neighbors,num_recv_neighbors,rows_send,rows_recv,round1_send,round1_recv,round2_send,round2_recv};
931 size_t gstats_min[8], gstats_max[8];
933 double lstats_avg[8], gstats_avg[8];
934 for(
int i=0; i<8; i++)
935 lstats_avg[i] = ((
double)lstats[i])/NumProcs;
937 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_MIN,8,lstats,gstats_min);
938 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_MAX,8,lstats,gstats_max);
939 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_SUM,8,lstats_avg,gstats_avg);
942 printf(
"%s Send Statistics[min/avg/max]: neigh=%d/%4.1f/%d rows=%d/%4.1f/%d round1=%d/%4.1f/%d round2=%d/%4.1f/%d\n", label.c_str(),
943 (int)gstats_min[0],gstats_avg[0],(
int)gstats_max[0], (int)gstats_min[2],gstats_avg[2],(
int)gstats_max[2],
944 (int)gstats_min[4],gstats_avg[4],(
int)gstats_max[4], (int)gstats_min[6],gstats_avg[6],(
int)gstats_max[6]);
945 printf(
"%s Recv Statistics[min/avg/max]: neigh=%d/%4.1f/%d rows=%d/%4.1f/%d round1=%d/%4.1f/%d round2=%d/%4.1f/%d\n", label.c_str(),
946 (int)gstats_min[1],gstats_avg[1],(
int)gstats_max[1], (int)gstats_min[3],gstats_avg[3],(
int)gstats_max[3],
947 (int)gstats_min[5],gstats_avg[5],(
int)gstats_max[5], (int)gstats_min[7],gstats_avg[7],(
int)gstats_max[7]);
953 template<
class Scalar,
957 void mult_AT_B_newmatrix(
961 const std::string & label,
962 const Teuchos::RCP<Teuchos::ParameterList>& params)
968 typedef LocalOrdinal LO;
969 typedef GlobalOrdinal GO;
974 #ifdef HAVE_TPETRA_MMM_TIMINGS 975 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
976 using Teuchos::TimeMonitor;
977 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T Transpose"))));
983 transposer_type transposer (rcpFromRef (A),label+std::string(
"XP: "));
984 RCP<Teuchos::ParameterList> transposeParams = Teuchos::rcp(
new Teuchos::ParameterList);
985 if(!params.is_null()) transposeParams->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
986 RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > Atrans = transposer.createTransposeLocal(transposeParams);
991 #ifdef HAVE_TPETRA_MMM_TIMINGS 992 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T I&X"))));
996 crs_matrix_struct_type Aview;
997 crs_matrix_struct_type Bview;
998 RCP<const Import<LocalOrdinal,GlobalOrdinal, Node> > dummyImporter;
1001 RCP<Teuchos::ParameterList> importParams1 = Teuchos::rcp(
new Teuchos::ParameterList);
1002 if(!params.is_null()) importParams1->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
1003 MMdetails::import_and_extract_views(*Atrans, Atrans->getRowMap(), Aview, dummyImporter,
true, label,importParams1);
1005 RCP<Teuchos::ParameterList> importParams2 = Teuchos::rcp(
new Teuchos::ParameterList);
1006 if(!params.is_null()) importParams2->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
1007 MMdetails::import_and_extract_views(B, B.
getRowMap(), Bview, dummyImporter,
true, label,importParams2);
1009 #ifdef HAVE_TPETRA_MMM_TIMINGS 1010 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T AB-core"))));
1013 RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >Ctemp;
1016 bool needs_final_export = !Atrans->getGraph()->getExporter().is_null();
1017 if (needs_final_export)
1020 Ctemp = rcp(&C,
false);
1023 mult_A_B_newmatrix(Aview, Bview, *Ctemp, label,params);
1028 #ifdef HAVE_TPETRA_MMM_TIMINGS 1029 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T exportAndFillComplete"))));
1032 Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > Crcp(&C,
false);
1033 if (needs_final_export) {
1034 Teuchos::ParameterList labelList;
1035 labelList.set(
"Timer Label", label);
1036 if(!params.is_null()) labelList.set(
"compute global constants",params->get(
"compute global constants",
true));
1038 Ctemp->exportAndFillComplete(Crcp,*Ctemp->getGraph()->getExporter(),
1041 #ifdef HAVE_TPETRA_MMM_STATISTICS 1042 printMultiplicationStatistics(Ctemp->getGraph()->getExporter(), label+std::string(
" AT_B MMM"));
1047 template<
class Scalar,
1049 class GlobalOrdinal,
1054 CrsWrapper<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1055 const std::string& label,
1056 const Teuchos::RCP<Teuchos::ParameterList>& params)
1058 using Teuchos::Array;
1059 using Teuchos::ArrayRCP;
1060 using Teuchos::ArrayView;
1061 using Teuchos::OrdinalTraits;
1062 using Teuchos::null;
1064 typedef Teuchos::ScalarTraits<Scalar> STS;
1066 LocalOrdinal C_firstCol = Bview.
colMap->getMinLocalIndex();
1067 LocalOrdinal C_lastCol = Bview.
colMap->getMaxLocalIndex();
1069 LocalOrdinal C_firstCol_import = OrdinalTraits<LocalOrdinal>::zero();
1070 LocalOrdinal C_lastCol_import = OrdinalTraits<LocalOrdinal>::invalid();
1072 ArrayView<const GlobalOrdinal> bcols = Bview.
colMap->getNodeElementList();
1073 ArrayView<const GlobalOrdinal> bcols_import = null;
1075 C_firstCol_import = Bview.
importColMap->getMinLocalIndex();
1076 C_lastCol_import = Bview.
importColMap->getMaxLocalIndex();
1078 bcols_import = Bview.
importColMap->getNodeElementList();
1081 size_t C_numCols = C_lastCol - C_firstCol +
1082 OrdinalTraits<LocalOrdinal>::one();
1083 size_t C_numCols_import = C_lastCol_import - C_firstCol_import +
1084 OrdinalTraits<LocalOrdinal>::one();
1086 if (C_numCols_import > C_numCols)
1087 C_numCols = C_numCols_import;
1089 Array<Scalar> dwork = Array<Scalar>(C_numCols);
1090 Array<GlobalOrdinal> iwork = Array<GlobalOrdinal>(C_numCols);
1091 Array<size_t> iwork2 = Array<size_t>(C_numCols);
1093 Array<Scalar> C_row_i = dwork;
1094 Array<GlobalOrdinal> C_cols = iwork;
1095 Array<size_t> c_index = iwork2;
1096 Array<GlobalOrdinal> combined_index = Array<GlobalOrdinal>(2*C_numCols);
1097 Array<Scalar> combined_values = Array<Scalar>(2*C_numCols);
1099 size_t C_row_i_length, j, k, last_index;
1102 LocalOrdinal LO_INVALID = OrdinalTraits<LocalOrdinal>::invalid();
1103 Array<LocalOrdinal> Acol2Brow(Aview.
colMap->getNodeNumElements(),LO_INVALID);
1104 Array<LocalOrdinal> Acol2Irow(Aview.
colMap->getNodeNumElements(),LO_INVALID);
1107 for(LocalOrdinal i=Aview.
colMap->getMinLocalIndex(); i <=
1108 Aview.
colMap->getMaxLocalIndex(); i++)
1113 for(LocalOrdinal i=Aview.
colMap->getMinLocalIndex(); i <=
1114 Aview.
colMap->getMaxLocalIndex(); i++) {
1115 GlobalOrdinal GID = Aview.
colMap->getGlobalElement(i);
1116 LocalOrdinal BLID = Bview.
origMatrix->getRowMap()->getLocalElement(GID);
1117 if(BLID != LO_INVALID) Acol2Brow[i] = BLID;
1118 else Acol2Irow[i] = Bview.
importMatrix->getRowMap()->getLocalElement(GID);
1128 ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP;
1129 ArrayRCP<const LocalOrdinal> Acolind_RCP, Bcolind_RCP, Icolind_RCP;
1130 ArrayRCP<const Scalar> Avals_RCP, Bvals_RCP, Ivals_RCP;
1131 ArrayView<const size_t> Arowptr, Browptr, Irowptr;
1132 ArrayView<const LocalOrdinal> Acolind, Bcolind, Icolind;
1133 ArrayView<const Scalar> Avals, Bvals, Ivals;
1134 Aview.
origMatrix->getAllValues(Arowptr_RCP,Acolind_RCP,Avals_RCP);
1135 Bview.
origMatrix->getAllValues(Browptr_RCP,Bcolind_RCP,Bvals_RCP);
1136 Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP();
1137 Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP();
1139 Bview.
importMatrix->getAllValues(Irowptr_RCP,Icolind_RCP,Ivals_RCP);
1140 Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP();
1143 bool C_filled = C.isFillComplete();
1145 for (
size_t i = 0; i < C_numCols; i++)
1146 c_index[i] = OrdinalTraits<size_t>::invalid();
1149 size_t Arows = Aview.
rowMap->getNodeNumElements();
1150 for(
size_t i=0; i<Arows; ++i) {
1154 GlobalOrdinal global_row = Aview.
rowMap->getGlobalElement(i);
1160 C_row_i_length = OrdinalTraits<size_t>::zero();
1162 for (k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1163 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1164 const Scalar Aval = Avals[k];
1165 if (Aval == STS::zero())
1168 if (Ak == LO_INVALID)
1171 for (j = Browptr[Ak]; j < Browptr[Ak+1]; ++j) {
1172 LocalOrdinal col = Bcolind[j];
1175 if (c_index[col] == OrdinalTraits<size_t>::invalid()){
1178 C_row_i[C_row_i_length] = Aval*Bvals[j];
1179 C_cols[C_row_i_length] = col;
1180 c_index[col] = C_row_i_length;
1184 C_row_i[c_index[col]] += Aval*Bvals[j];
1189 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1190 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1191 C_cols[ii] = bcols[C_cols[ii]];
1192 combined_index[ii] = C_cols[ii];
1193 combined_values[ii] = C_row_i[ii];
1195 last_index = C_row_i_length;
1201 C_row_i_length = OrdinalTraits<size_t>::zero();
1203 for (k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1204 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1205 const Scalar Aval = Avals[k];
1206 if (Aval == STS::zero())
1209 if (Ak!=LO_INVALID)
continue;
1211 Ak = Acol2Irow[Acolind[k]];
1212 for (j = Irowptr[Ak]; j < Irowptr[Ak+1]; ++j) {
1213 LocalOrdinal col = Icolind[j];
1216 if (c_index[col] == OrdinalTraits<size_t>::invalid()) {
1219 C_row_i[C_row_i_length] = Aval*Ivals[j];
1220 C_cols[C_row_i_length] = col;
1221 c_index[col] = C_row_i_length;
1226 C_row_i[c_index[col]] += Aval*Ivals[j];
1231 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1232 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1233 C_cols[ii] = bcols_import[C_cols[ii]];
1234 combined_index[last_index] = C_cols[ii];
1235 combined_values[last_index] = C_row_i[ii];
1242 C.sumIntoGlobalValues(
1244 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1245 combined_values.view(OrdinalTraits<size_t>::zero(), last_index))
1247 C.insertGlobalValues(
1249 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1250 combined_values.view(OrdinalTraits<size_t>::zero(), last_index));
1255 template<
class Scalar,
1257 class GlobalOrdinal,
1260 typedef typename Teuchos::Array<Teuchos::ArrayView<const LocalOrdinal> >::size_type local_length_size;
1261 Mview.maxNumRowEntries = Teuchos::OrdinalTraits<local_length_size>::zero();
1263 if (Mview.indices.size() > Teuchos::OrdinalTraits<local_length_size>::zero()) {
1264 Mview.maxNumRowEntries = Mview.indices[0].size();
1266 for (local_length_size i = 1; i < Mview.indices.size(); ++i)
1267 if (Mview.indices[i].size() > Mview.maxNumRowEntries)
1268 Mview.maxNumRowEntries = Mview.indices[i].size();
1273 template<
class CrsMatrixType>
1274 size_t C_estimate_nnz(CrsMatrixType & A, CrsMatrixType &B){
1276 size_t Aest = 100, Best=100;
1277 if (A.getNodeNumEntries() > 0)
1278 Aest = (A.getNodeNumRows() > 0)? A.getNodeNumEntries()/A.getNodeNumRows() : 100;
1279 if (B.getNodeNumEntries() > 0)
1280 Best = (B.getNodeNumRows() > 0) ? B.getNodeNumEntries()/B.getNodeNumRows() : 100;
1282 size_t nnzperrow = (size_t)(sqrt((
double)Aest) + sqrt((
double)Best) - 1);
1283 nnzperrow *= nnzperrow;
1285 return (
size_t)(A.getNodeNumRows()*nnzperrow*0.75 + 100);
1294 template<
class Scalar,
1296 class GlobalOrdinal,
1298 void mult_A_B_newmatrix(
1302 const std::string& label,
1303 const Teuchos::RCP<Teuchos::ParameterList>& params)
1305 using Teuchos::Array;
1306 using Teuchos::ArrayRCP;
1307 using Teuchos::ArrayView;
1312 typedef LocalOrdinal LO;
1313 typedef GlobalOrdinal GO;
1319 #ifdef HAVE_TPETRA_MMM_TIMINGS 1320 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1321 using Teuchos::TimeMonitor;
1322 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM M5 Cmap")))));
1324 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1327 RCP<const import_type> Cimport;
1328 RCP<const map_type> Ccolmap;
1329 RCP<const import_type> Bimport = Bview.
origMatrix->getGraph()->getImporter();
1330 RCP<const import_type> Iimport = Bview.
importMatrix.is_null() ?
1331 Teuchos::null : Bview.
importMatrix->getGraph()->getImporter();
1338 Array<LO> Bcol2Ccol(Bview.
colMap->getNodeNumElements()), Icol2Ccol;
1345 for (
size_t i = 0; i < Bview.
colMap->getNodeNumElements(); i++)
1346 Bcol2Ccol[i] = Teuchos::as<LO>(i);
1357 if (!Bimport.is_null() && !Iimport.is_null())
1358 Cimport = Bimport->setUnion(*Iimport);
1360 else if (!Bimport.is_null() && Iimport.is_null())
1361 Cimport = Bimport->setUnion();
1363 else if (Bimport.is_null() && !Iimport.is_null())
1364 Cimport = Iimport->setUnion();
1367 throw std::runtime_error(
"TpetraExt::MMM status of matrix importers is nonsensical");
1369 Ccolmap = Cimport->getTargetMap();
1374 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.
origMatrix->getDomainMap()),
1375 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
1382 Icol2Ccol.resize(Bview.
importMatrix->getColMap()->getNodeNumElements());
1383 ArrayView<const GO> Bgid = Bview.
origMatrix->getColMap()->getNodeElementList();
1384 ArrayView<const GO> Igid = Bview.
importMatrix->getColMap()->getNodeElementList();
1386 for (
size_t i = 0; i < Bview.
origMatrix->getColMap()->getNodeNumElements(); i++)
1387 Bcol2Ccol[i] = Ccolmap->getLocalElement(Bgid[i]);
1388 for (
size_t i = 0; i < Bview.
importMatrix->getColMap()->getNodeNumElements(); i++)
1389 Icol2Ccol[i] = Ccolmap->getLocalElement(Igid[i]);
1414 Array<LO> targetMapToOrigRow (Aview.
colMap->getNodeNumElements(), LO_INVALID);
1415 Array<LO> targetMapToImportRow(Aview.
colMap->getNodeNumElements(), LO_INVALID);
1417 for (LO i = Aview.
colMap->getMinLocalIndex(); i <= Aview.
colMap->getMaxLocalIndex(); i++) {
1418 LO B_LID = Bview.
origMatrix->getRowMap()->getLocalElement(Aview.
colMap->getGlobalElement(i));
1419 if (B_LID != LO_INVALID) {
1420 targetMapToOrigRow[i] = B_LID;
1422 LO I_LID = Bview.
importMatrix->getRowMap()->getLocalElement(Aview.
colMap->getGlobalElement(i));
1423 targetMapToImportRow[i] = I_LID;
1431 KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node>::mult_A_B_newmatrix_kernel_wrapper(Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
1440 template<
class Scalar,
1442 class GlobalOrdinal,
1446 const Teuchos::Array<LocalOrdinal> & targetMapToOrigRow,
1447 const Teuchos::Array<LocalOrdinal> & targetMapToImportRow,
1448 const Teuchos::Array<LocalOrdinal> & Bcol2Ccol,
1449 const Teuchos::Array<LocalOrdinal> & Icol2Ccol,
1452 const std::string& label,
1453 const Teuchos::RCP<Teuchos::ParameterList>& params) {
1454 #ifdef HAVE_TPETRA_MMM_TIMINGS 1455 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1456 using Teuchos::TimeMonitor;
1457 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore"))));
1460 using Teuchos::Array;
1461 using Teuchos::ArrayRCP;
1462 using Teuchos::ArrayView;
1467 typedef LocalOrdinal LO;
1468 typedef GlobalOrdinal GO;
1471 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1472 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1473 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1476 RCP<const map_type> Ccolmap = C.
getColMap();
1477 size_t m = Aview.
origMatrix->getNodeNumRows();
1478 size_t n = Ccolmap->getNodeNumElements();
1481 ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP;
1482 ArrayRCP<size_t> Crowptr_RCP;
1483 ArrayRCP<const LO> Acolind_RCP, Bcolind_RCP, Icolind_RCP;
1484 ArrayRCP<LO> Ccolind_RCP;
1485 ArrayRCP<const Scalar> Avals_RCP, Bvals_RCP, Ivals_RCP;
1486 ArrayRCP<SC> Cvals_RCP;
1493 Aview.
origMatrix->getAllValues(Arowptr_RCP, Acolind_RCP, Avals_RCP);
1494 Bview.
origMatrix->getAllValues(Browptr_RCP, Bcolind_RCP, Bvals_RCP);
1496 Bview.
importMatrix->getAllValues(Irowptr_RCP, Icolind_RCP, Ivals_RCP);
1503 ArrayView<const size_t> Arowptr, Browptr, Irowptr;
1504 ArrayView<const LO> Acolind, Bcolind, Icolind;
1505 ArrayView<const SC> Avals, Bvals, Ivals;
1506 ArrayView<size_t> Crowptr;
1507 ArrayView<LO> Ccolind;
1508 ArrayView<SC> Cvals;
1509 Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP();
1510 Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP();
1512 Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP();
1523 Crowptr_RCP.resize(m+1); Crowptr = Crowptr_RCP();
1524 Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP();
1525 Cvals_RCP.resize(CSR_alloc); Cvals = Cvals_RCP();
1536 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
1537 Array<size_t> c_status(n, ST_INVALID);
1547 size_t CSR_ip = 0, OLD_ip = 0;
1548 for (
size_t i = 0; i < m; i++) {
1551 Crowptr[i] = CSR_ip;
1554 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
1555 LO Aik = Acolind[k];
1556 const SC Aval = Avals[k];
1557 if (Aval == SC_ZERO)
1560 if (targetMapToOrigRow[Aik] != LO_INVALID) {
1567 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
1570 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
1571 LO Bkj = Bcolind[j];
1572 LO Cij = Bcol2Ccol[Bkj];
1574 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
1576 c_status[Cij] = CSR_ip;
1577 Ccolind[CSR_ip] = Cij;
1578 Cvals[CSR_ip] = Aval*Bvals[j];
1582 Cvals[c_status[Cij]] += Aval*Bvals[j];
1593 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
1594 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
1595 LO Ikj = Icolind[j];
1596 LO Cij = Icol2Ccol[Ikj];
1598 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip){
1600 c_status[Cij] = CSR_ip;
1601 Ccolind[CSR_ip] = Cij;
1602 Cvals[CSR_ip] = Aval*Ivals[j];
1606 Cvals[c_status[Cij]] += Aval*Ivals[j];
1613 if (CSR_ip + n > CSR_alloc) {
1615 Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP();
1616 Cvals_RCP.resize(CSR_alloc); Cvals = Cvals_RCP();
1621 Crowptr[m] = CSR_ip;
1624 Cvals_RCP .resize(CSR_ip);
1625 Ccolind_RCP.resize(CSR_ip);
1627 #ifdef HAVE_TPETRA_MMM_TIMINGS 1628 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix Final Sort"))));
1641 Import_Util::sortCrsEntries(Crowptr_RCP(), Ccolind_RCP(), Cvals_RCP());
1645 #ifdef HAVE_TPETRA_MMM_TIMINGS 1646 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix ESFC"))));
1657 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
1658 labelList->set(
"Timer Label",label);
1659 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
1660 RCP<const Export<LO,GO,NO> > dummyExport;
1661 C.
expertStaticFillComplete(Bview. origMatrix->getDomainMap(), Aview. origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
1668 #if defined(HAVE_KOKKOSKERNELS_EXPERIMENTAL) && defined (HAVE_TPETRA_INST_OPENMP) 1669 template<
class Scalar,
1671 class GlobalOrdinal>
1672 struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> {
1675 const Teuchos::Array<LocalOrdinal> & Acol2Brow,
1676 const Teuchos::Array<LocalOrdinal> & Acol2Irow,
1677 const Teuchos::Array<LocalOrdinal> & Bcol2Ccol,
1678 const Teuchos::Array<LocalOrdinal> & Icol2Ccol,
1681 const std::string& label = std::string(),
1682 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
1688 template<
class Scalar,
1690 class GlobalOrdinal>
1693 const Teuchos::Array<LocalOrdinal> & Acol2Brow,
1694 const Teuchos::Array<LocalOrdinal> & Acol2Irow,
1695 const Teuchos::Array<LocalOrdinal> & Bcol2Ccol,
1696 const Teuchos::Array<LocalOrdinal> & Icol2Ccol,
1699 const std::string& label,
1700 const Teuchos::RCP<Teuchos::ParameterList>& params) {
1702 #ifdef HAVE_TPETRA_MMM_TIMINGS 1703 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1704 using Teuchos::TimeMonitor;
1705 Teuchos::RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPWrapper")))));
1710 typedef Kokkos::Compat::KokkosOpenMPWrapperNode Node;
1711 std::string nodename(
"OpenMP");
1716 typedef typename KCRS::device_type device_t;
1717 typedef typename KCRS::StaticCrsGraphType graph_t;
1718 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1719 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
1720 typedef typename KCRS::values_type::non_const_type scalar_view_t;
1724 int team_work_size = 16;
1725 std::string myalg(
"SPGEMM_KK_MEMORY");
1726 if(!params.is_null()) {
1727 if(params->isParameter(
"openmp: algorithm"))
1728 myalg = params->get(
"openmp: algorithm",myalg);
1729 if(params->isParameter(
"openmp: team work size"))
1730 team_work_size = params->get(
"openmp: team work size",team_work_size);
1734 typedef KokkosKernels::Experimental::KokkosKernelsHandle<lno_view_t,lno_nnz_view_t, scalar_view_t, typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space > KernelHandle;
1737 const KCRS & Ak = Aview.
origMatrix->getLocalMatrix();
1738 const KCRS & Bk = Bview.
origMatrix->getLocalMatrix();
1739 RCP<const KCRS> Bmerged;
1742 std::string alg = nodename+std::string(
" algorithm");
1744 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
1745 KokkosKernels::Experimental::Graph::SPGEMMAlgorithm alg_enum = KokkosKernels::Experimental::Graph::StringToSPGEMMAlgorithm(myalg);
1755 size_t merge_numrows = Ak.numCols();
1756 lno_view_t Mrowptr(
"Mrowptr", merge_numrows + 1);
1758 const LocalOrdinal LO_INVALID =Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
1761 const LocalOrdinal * Acol2Brow_ptr = Acol2Brow.getRawPtr();
1762 const LocalOrdinal * Acol2Irow_ptr = Acol2Irow.getRawPtr();
1763 const LocalOrdinal * Bcol2Ccol_ptr = Bcol2Ccol.getRawPtr();
1764 const LocalOrdinal * Icol2Ccol_ptr = Icol2Ccol.getRawPtr();
1767 typedef Node::execution_space execution_space;
1768 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
1769 Kokkos::parallel_scan (range_type (0, merge_numrows),
1770 KOKKOS_LAMBDA (
const size_t i,
size_t& update,
const bool final) {
1771 if(
final) Mrowptr(i) = update;
1775 if(Acol2Brow_ptr[i]!=LO_INVALID)
1776 ct = Bk.graph.row_map(Acol2Brow_ptr[i]+1) - Bk.graph.row_map(Acol2Brow_ptr[i]);
1778 ct = Ik->graph.row_map(Acol2Irow_ptr[i]+1) - Ik->graph.row_map(Acol2Irow_ptr[i]);
1781 if(
final && i+1==merge_numrows)
1782 Mrowptr(i+1)=update;
1786 size_t merge_nnz = Mrowptr(merge_numrows);
1787 lno_nnz_view_t Mcolind(
"Mcolind",merge_nnz);
1788 scalar_view_t Mvalues(
"Mvals",merge_nnz);
1791 typedef Node::execution_space execution_space;
1792 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
1793 Kokkos::parallel_for (range_type (0, merge_numrows),
1794 KOKKOS_LAMBDA (
const size_t i) {
1795 if(Acol2Brow_ptr[i]!=LO_INVALID) {
1796 size_t row = Acol2Brow_ptr[i];
1797 size_t start = Bk.graph.row_map(row);
1798 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
1799 Mvalues(j) = Bk.values(j-Mrowptr(i)+start);
1800 Mcolind(j) = Bcol2Ccol_ptr[Bk.graph.entries(j-Mrowptr(i)+start)];
1804 size_t row = Acol2Irow_ptr[i];
1805 size_t start = Ik->graph.row_map(row);
1806 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
1807 Mvalues(j) = Ik->values(j-Mrowptr(i)+start);
1808 Mcolind(j) = Icol2Ccol_ptr[Ik->graph.entries(j-Mrowptr(i)+start)];
1813 Bmerged = Teuchos::rcp(
new KCRS(
"CrsMatrix",merge_numrows,C.
getColMap()->getNodeNumElements(),merge_nnz,Mvalues,Mrowptr,Mcolind));
1818 Bmerged = Teuchos::rcpFromRef(Bk);
1821 #ifdef HAVE_TPETRA_MMM_TIMINGS 1822 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPCore"))));
1826 typename KernelHandle::nnz_lno_t AnumRows = Ak.numRows();
1827 typename KernelHandle::nnz_lno_t BnumRows = Bmerged->numRows();
1828 typename KernelHandle::nnz_lno_t BnumCols = Bmerged->numCols();
1830 lno_view_t row_mapC (
"non_const_lnow_row", AnumRows + 1);
1831 lno_nnz_view_t entriesC;
1832 scalar_view_t valuesC;
1834 kh.create_spgemm_handle(alg_enum);
1835 kh.set_team_work_size(team_work_size);
1837 KokkosKernels::Experimental::Graph::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,
false,Bmerged->graph.row_map,Bmerged->graph.entries,
false,row_mapC);
1839 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
1841 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
1842 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
1844 KokkosKernels::Experimental::Graph::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,
false,Bmerged->graph.row_map,Bmerged->graph.entries,Bmerged->values,
false,row_mapC,entriesC,valuesC);
1845 kh.destroy_spgemm_handle();
1847 #ifdef HAVE_TPETRA_MMM_TIMINGS 1848 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPSort"))));
1852 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
1855 #ifdef HAVE_TPETRA_MMM_TIMINGS 1856 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPESFC"))));
1860 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
1861 labelList->set(
"Timer Label",label);
1862 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
1867 Teuchos::ArrayRCP< const size_t > Crowptr;
1868 Teuchos::ArrayRCP< const LocalOrdinal > Ccolind;
1869 Teuchos::ArrayRCP< const Scalar > Cvalues;
1870 C.getAllValues(Crowptr,Ccolind,Cvalues);
1873 int MyPID = C->
getComm()->getRank();
1874 printf(
"[%d] Crowptr = ",MyPID);
1875 for(
size_t i=0; i<(size_t) Crowptr.size(); i++) {
1876 printf(
"%3d ",(
int)Crowptr.getConst()[i]);
1879 printf(
"[%d] Ccolind = ",MyPID);
1880 for(
size_t i=0; i<(size_t)Ccolind.size(); i++) {
1881 printf(
"%3d ",(
int)Ccolind.getConst()[i]);
1898 template<
class Scalar,
1900 class GlobalOrdinal,
1902 void mult_A_B_reuse(
1906 const std::string& label,
1907 const Teuchos::RCP<Teuchos::ParameterList>& params)
1909 using Teuchos::Array;
1910 using Teuchos::ArrayRCP;
1911 using Teuchos::ArrayView;
1916 typedef LocalOrdinal LO;
1917 typedef GlobalOrdinal GO;
1923 #ifdef HAVE_TPETRA_MMM_TIMINGS 1924 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1925 using Teuchos::TimeMonitor;
1926 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse Cmap"))));
1928 size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1929 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1932 RCP<const import_type> Cimport = C.
getGraph()->getImporter();
1933 RCP<const map_type> Ccolmap = C.
getColMap();
1935 Array<LO> Bcol2Ccol(Bview.
colMap->getNodeNumElements()), Icol2Ccol;
1939 ArrayView<const GO> Bgid = Bview.
origMatrix->getColMap()->getNodeElementList();
1940 for (
size_t i = 0; i < Bview.
origMatrix->getColMap()->getNodeNumElements(); i++)
1941 Bcol2Ccol[i] = Ccolmap->getLocalElement(Bgid[i]);
1944 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.
origMatrix->getDomainMap()),
1945 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
1947 Icol2Ccol.resize(Bview.
importMatrix->getColMap()->getNodeNumElements());
1948 ArrayView<const GO> Igid = Bview.
importMatrix->getColMap()->getNodeElementList();
1949 for (
size_t i = 0; i < Bview.
importMatrix->getColMap()->getNodeNumElements(); i++)
1950 Icol2Ccol[i] = Ccolmap->getLocalElement(Igid[i]);
1954 #ifdef HAVE_TPETRA_MMM_TIMINGS 1955 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse SerialCore"))));
1959 size_t m = Aview.
origMatrix->getNodeNumRows();
1960 size_t n = Ccolmap->getNodeNumElements();
1963 ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP, Crowptr_RCP;
1964 ArrayRCP<const LO> Acolind_RCP, Bcolind_RCP, Icolind_RCP, Ccolind_RCP;
1965 ArrayRCP<const SC> Avals_RCP, Bvals_RCP, Ivals_RCP, Cvals_RCP;
1967 Aview.
origMatrix->getAllValues(Arowptr_RCP, Acolind_RCP, Avals_RCP);
1968 Bview.
origMatrix->getAllValues(Browptr_RCP, Bcolind_RCP, Bvals_RCP);
1970 Bview.
importMatrix->getAllValues(Irowptr_RCP, Icolind_RCP, Ivals_RCP);
1971 C.getAllValues(Crowptr_RCP, Ccolind_RCP, Cvals_RCP);
1974 ArrayView<const size_t> Arowptr, Browptr, Irowptr, Crowptr;
1975 ArrayView<const LO> Acolind, Bcolind, Icolind, Ccolind;
1976 ArrayView<const SC> Avals, Bvals, Ivals;
1977 ArrayView<SC> Cvals;
1978 Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP();
1979 Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP();
1981 Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP();
1983 Crowptr = Crowptr_RCP(); Ccolind = Ccolind_RCP(); Cvals = (Teuchos::arcp_const_cast<SC>(Cvals_RCP))();
1986 Array<LO> targetMapToOrigRow (Aview.
colMap->getNodeNumElements(), LO_INVALID);
1987 Array<LO> targetMapToImportRow(Aview.
colMap->getNodeNumElements(), LO_INVALID);
1989 for (LO i = Aview.
colMap->getMinLocalIndex(); i <= Aview.
colMap->getMaxLocalIndex(); i++) {
1990 LO B_LID = Bview.
origMatrix->getRowMap()->getLocalElement(Aview.
colMap->getGlobalElement(i));
1991 if (B_LID != LO_INVALID) {
1992 targetMapToOrigRow[i] = B_LID;
1994 LO I_LID = Bview.
importMatrix->getRowMap()->getLocalElement(Aview.
colMap->getGlobalElement(i));
1995 targetMapToImportRow[i] = I_LID;
1999 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2006 Array<size_t> c_status(n, ST_INVALID);
2009 size_t CSR_ip = 0, OLD_ip = 0;
2010 for (
size_t i = 0; i < m; i++) {
2014 OLD_ip = Crowptr[i];
2015 CSR_ip = Crowptr[i+1];
2016 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
2017 c_status[Ccolind[k]] = k;
2023 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2024 LO Aik = Acolind[k];
2025 const SC Aval = Avals[k];
2026 if (Aval == SC_ZERO)
2029 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2031 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
2033 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2034 LO Bkj = Bcolind[j];
2035 LO Cij = Bcol2Ccol[Bkj];
2037 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2038 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
2039 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
2041 Cvals[c_status[Cij]] += Aval * Bvals[j];
2046 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
2047 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2048 LO Ikj = Icolind[j];
2049 LO Cij = Icol2Ccol[Ikj];
2051 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2052 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
2053 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
2055 Cvals[c_status[Cij]] += Aval * Ivals[j];
2066 template<
class Scalar,
2068 class GlobalOrdinal,
2070 void jacobi_A_B_newmatrix(
2076 const std::string& label,
2077 const Teuchos::RCP<Teuchos::ParameterList>& params)
2079 using Teuchos::Array;
2080 using Teuchos::ArrayRCP;
2081 using Teuchos::ArrayView;
2086 typedef LocalOrdinal LO;
2087 typedef GlobalOrdinal GO;
2093 #ifdef HAVE_TPETRA_MMM_TIMINGS 2094 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2095 using Teuchos::TimeMonitor;
2096 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi M5 Cmap"))));
2098 size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2099 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2103 RCP<const import_type> Cimport;
2104 RCP<const map_type> Ccolmap;
2105 RCP<const import_type> Bimport = Bview.
origMatrix->getGraph()->getImporter();
2106 RCP<const import_type> Iimport = Bview.
importMatrix.is_null() ? Teuchos::null : Bview.
importMatrix->getGraph()->getImporter();
2113 Array<LO> Bcol2Ccol(Bview.
colMap->getNodeNumElements()), Icol2Ccol;
2120 for (
size_t i = 0; i < Bview.
colMap->getNodeNumElements(); i++)
2121 Bcol2Ccol[i] = Teuchos::as<LO>(i);
2132 if (!Bimport.is_null() && !Iimport.is_null()){
2133 Cimport = Bimport->setUnion(*Iimport);
2134 Ccolmap = Cimport->getTargetMap();
2136 }
else if (!Bimport.is_null() && Iimport.is_null()) {
2137 Cimport = Bimport->setUnion();
2139 }
else if(Bimport.is_null() && !Iimport.is_null()) {
2140 Cimport = Iimport->setUnion();
2143 throw std::runtime_error(
"TpetraExt::Jacobi status of matrix importers is nonsensical");
2145 Ccolmap = Cimport->getTargetMap();
2147 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.
origMatrix->getDomainMap()),
2148 std::runtime_error,
"Tpetra:Jacobi Import setUnion messed with the DomainMap in an unfortunate way");
2155 Icol2Ccol.resize(Bview.
importMatrix->getColMap()->getNodeNumElements());
2156 ArrayView<const GO> Bgid = Bview.
origMatrix->getColMap()->getNodeElementList();
2157 ArrayView<const GO> Igid = Bview.
importMatrix->getColMap()->getNodeElementList();
2159 for (
size_t i = 0; i < Bview.
origMatrix->getColMap()->getNodeNumElements(); i++)
2160 Bcol2Ccol[i] = Ccolmap->getLocalElement(Bgid[i]);
2161 for (
size_t i = 0; i < Bview.
importMatrix->getColMap()->getNodeNumElements(); i++)
2162 Icol2Ccol[i] = Ccolmap->getLocalElement(Igid[i]);
2165 #ifdef HAVE_TPETRA_MMM_TIMINGS 2166 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SerialCore"))));
2170 size_t m = Aview.
origMatrix->getNodeNumRows();
2171 size_t n = Ccolmap->getNodeNumElements();
2174 ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP;
2175 ArrayRCP<size_t> Crowptr_RCP;
2176 ArrayRCP<const LO> Acolind_RCP, Bcolind_RCP, Icolind_RCP;
2177 ArrayRCP<LO> Ccolind_RCP;
2178 ArrayRCP<const SC> Avals_RCP, Bvals_RCP, Ivals_RCP;
2179 ArrayRCP<SC> Cvals_RCP;
2180 ArrayRCP<const SC> Dvals_RCP;
2187 Aview.
origMatrix->getAllValues(Arowptr_RCP, Acolind_RCP, Avals_RCP);
2188 Bview.
origMatrix->getAllValues(Browptr_RCP, Bcolind_RCP, Bvals_RCP);
2190 Bview.
importMatrix->getAllValues(Irowptr_RCP, Icolind_RCP, Ivals_RCP);
2201 ArrayView<const size_t> Arowptr, Browptr, Irowptr;
2202 ArrayView<const LO> Acolind, Bcolind, Icolind;
2203 ArrayView<const SC> Avals, Bvals, Ivals;
2204 ArrayView<size_t> Crowptr;
2205 ArrayView<LO> Ccolind;
2206 ArrayView<SC> Cvals;
2207 ArrayView<const SC> Dvals;
2208 Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP();
2209 Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP();
2211 Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP();
2213 Dvals = Dvals_RCP();
2220 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
2221 Array<size_t> c_status(n, ST_INVALID);
2231 size_t CSR_ip = 0, OLD_ip = 0;
2232 Crowptr_RCP.resize(m+1); Crowptr = Crowptr_RCP();
2233 Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP();
2234 Cvals_RCP.resize(CSR_alloc); Cvals = Cvals_RCP();
2252 Array<LO> targetMapToOrigRow (Aview.
colMap->getNodeNumElements(), LO_INVALID);
2253 Array<LO> targetMapToImportRow(Aview.
colMap->getNodeNumElements(), LO_INVALID);
2255 for (LO i = Aview.
colMap->getMinLocalIndex(); i <= Aview.
colMap->getMaxLocalIndex(); i++) {
2256 LO B_LID = Bview.
origMatrix->getRowMap()->getLocalElement(Aview.
colMap->getGlobalElement(i));
2257 if (B_LID != LO_INVALID) {
2258 targetMapToOrigRow[i] = B_LID;
2260 LO I_LID = Bview.
importMatrix->getRowMap()->getLocalElement(Aview.
colMap->getGlobalElement(i));
2261 targetMapToImportRow[i] = I_LID;
2265 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2279 for (
size_t i = 0; i < m; i++) {
2282 Crowptr[i] = CSR_ip;
2286 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
2287 Scalar Bval = Bvals[j];
2288 if (Bval == SC_ZERO)
2290 LO Bij = Bcolind[j];
2291 LO Cij = Bcol2Ccol[Bij];
2294 c_status[Cij] = CSR_ip;
2295 Ccolind[CSR_ip] = Cij;
2296 Cvals[CSR_ip] = Bvals[j];
2301 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2302 LO Aik = Acolind[k];
2303 const SC Aval = Avals[k];
2304 if (Aval == SC_ZERO)
2307 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2309 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
2311 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2312 LO Bkj = Bcolind[j];
2313 LO Cij = Bcol2Ccol[Bkj];
2315 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2317 c_status[Cij] = CSR_ip;
2318 Ccolind[CSR_ip] = Cij;
2319 Cvals[CSR_ip] = - omega * Dval* Aval * Bvals[j];
2323 Cvals[c_status[Cij]] -= omega * Dval* Aval * Bvals[j];
2329 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
2330 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2331 LO Ikj = Icolind[j];
2332 LO Cij = Icol2Ccol[Ikj];
2334 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2336 c_status[Cij] = CSR_ip;
2337 Ccolind[CSR_ip] = Cij;
2338 Cvals[CSR_ip] = - omega * Dval* Aval * Ivals[j];
2341 Cvals[c_status[Cij]] -= omega * Dval* Aval * Ivals[j];
2348 if (CSR_ip + n > CSR_alloc) {
2350 Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP();
2351 Cvals_RCP.resize(CSR_alloc); Cvals = Cvals_RCP();
2356 Crowptr[m] = CSR_ip;
2359 Cvals_RCP .resize(CSR_ip);
2360 Ccolind_RCP.resize(CSR_ip);
2363 #ifdef HAVE_TPETRA_MMM_TIMINGS 2364 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix Final Sort"))));
2377 Import_Util::sortCrsEntries(Crowptr_RCP(), Ccolind_RCP(), Cvals_RCP());
2381 #ifdef HAVE_TPETRA_MMM_TIMINGS 2382 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix ESFC"))));
2393 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
2394 labelList->set(
"Timer Label",label);
2395 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
2396 RCP<const Export<LO,GO,NO> > dummyExport;
2402 template<
class Scalar,
2404 class GlobalOrdinal,
2406 void jacobi_A_B_reuse(
2412 const std::string& label,
2413 const Teuchos::RCP<Teuchos::ParameterList>& params)
2415 using Teuchos::Array;
2416 using Teuchos::ArrayRCP;
2417 using Teuchos::ArrayView;
2422 typedef LocalOrdinal LO;
2423 typedef GlobalOrdinal GO;
2429 #ifdef HAVE_TPETRA_MMM_TIMINGS 2430 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2431 using Teuchos::TimeMonitor;
2432 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse Cmap"))));
2434 size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2435 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2438 RCP<const import_type> Cimport = C.
getGraph()->getImporter();
2439 RCP<const map_type> Ccolmap = C.
getColMap();
2441 Array<LO> Bcol2Ccol(Bview.
colMap->getNodeNumElements()), Icol2Ccol;
2447 for (
size_t i = 0; i < Bview.
colMap->getNodeNumElements(); i++)
2448 Bcol2Ccol[i] = Teuchos::as<LO>(i);
2451 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.
origMatrix->getDomainMap()),
2452 std::runtime_error,
"Tpetra:Jacobi Import setUnion messed with the DomainMap in an unfortunate way");
2455 Icol2Ccol.resize(Bview.
importMatrix->getColMap()->getNodeNumElements());
2456 ArrayView<const GO> Bgid = Bview.
origMatrix->getColMap()->getNodeElementList();
2457 ArrayView<const GO> Igid = Bview.
importMatrix->getColMap()->getNodeElementList();
2459 for (
size_t i = 0; i < Bview.
origMatrix->getColMap()->getNodeNumElements(); i++)
2460 Bcol2Ccol[i] = Ccolmap->getLocalElement(Bgid[i]);
2461 for (
size_t i = 0; i < Bview.
importMatrix->getColMap()->getNodeNumElements(); i++)
2462 Icol2Ccol[i] = Ccolmap->getLocalElement(Igid[i]);
2465 #ifdef HAVE_TPETRA_MMM_TIMINGS 2466 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SerialCore"))));
2470 size_t m = Aview.
origMatrix->getNodeNumRows();
2471 size_t n = Ccolmap->getNodeNumElements();
2474 ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP, Crowptr_RCP;
2475 ArrayRCP<const LO> Acolind_RCP, Bcolind_RCP, Icolind_RCP, Ccolind_RCP;
2476 ArrayRCP<const SC> Avals_RCP, Bvals_RCP, Ivals_RCP, Cvals_RCP, Dvals_RCP;
2478 Aview.
origMatrix->getAllValues(Arowptr_RCP, Acolind_RCP, Avals_RCP);
2479 Bview.
origMatrix->getAllValues(Browptr_RCP, Bcolind_RCP, Bvals_RCP);
2481 Bview.
importMatrix->getAllValues(Irowptr_RCP, Icolind_RCP, Ivals_RCP);
2482 C.getAllValues(Crowptr_RCP, Ccolind_RCP, Cvals_RCP);
2486 ArrayView<const size_t> Arowptr, Browptr, Irowptr, Crowptr;
2487 ArrayView<const LO> Acolind, Bcolind, Icolind, Ccolind;
2488 ArrayView<const SC> Avals, Bvals, Ivals;
2489 ArrayView<SC> Cvals;
2490 ArrayView<const SC> Dvals;
2491 Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP();
2492 Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP();
2494 Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP();
2496 Crowptr = Crowptr_RCP(); Ccolind = Ccolind_RCP(); Cvals = (Teuchos::arcp_const_cast<SC>(Cvals_RCP))();
2497 Dvals = Dvals_RCP();
2500 Array<LO> targetMapToOrigRow (Aview.
colMap->getNodeNumElements(), LO_INVALID);
2501 Array<LO> targetMapToImportRow(Aview.
colMap->getNodeNumElements(), LO_INVALID);
2503 for (LO i = Aview.
colMap->getMinLocalIndex(); i <= Aview.
colMap->getMaxLocalIndex(); i++) {
2504 LO B_LID = Bview.
origMatrix->getRowMap()->getLocalElement(Aview.
colMap->getGlobalElement(i));
2505 if (B_LID != LO_INVALID) {
2506 targetMapToOrigRow[i] = B_LID;
2508 LO I_LID = Bview.
importMatrix->getRowMap()->getLocalElement(Aview.
colMap->getGlobalElement(i));
2509 targetMapToImportRow[i] = I_LID;
2513 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2520 Array<size_t> c_status(n, ST_INVALID);
2523 size_t CSR_ip = 0, OLD_ip = 0;
2524 for (
size_t i = 0; i < m; i++) {
2528 OLD_ip = Crowptr[i];
2529 CSR_ip = Crowptr[i+1];
2530 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
2531 c_status[Ccolind[k]] = k;
2540 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
2541 Scalar Bval = Bvals[j];
2542 if (Bval == SC_ZERO)
2544 LO Bij = Bcolind[j];
2545 LO Cij = Bcol2Ccol[Bij];
2547 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2548 std::runtime_error,
"Trying to insert a new entry into a static graph");
2550 Cvals[c_status[Cij]] = Bvals[j];
2554 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2555 LO Aik = Acolind[k];
2556 const SC Aval = Avals[k];
2557 if (Aval == SC_ZERO)
2560 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2562 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
2564 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2565 LO Bkj = Bcolind[j];
2566 LO Cij = Bcol2Ccol[Bkj];
2568 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2569 std::runtime_error,
"Trying to insert a new entry into a static graph");
2571 Cvals[c_status[Cij]] -= omega * Dval* Aval * Bvals[j];
2576 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
2577 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2578 LO Ikj = Icolind[j];
2579 LO Cij = Icol2Ccol[Ikj];
2581 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2582 std::runtime_error,
"Trying to insert a new entry into a static graph");
2584 Cvals[c_status[Cij]] -= omega * Dval* Aval * Ivals[j];
2594 template<
class Scalar,
2596 class GlobalOrdinal,
2598 void import_and_extract_views(
2603 bool userAssertsThereAreNoRemotes,
2604 const std::string& label,
2605 const Teuchos::RCP<Teuchos::ParameterList>& params)
2607 using Teuchos::Array;
2608 using Teuchos::ArrayView;
2611 using Teuchos::null;
2614 typedef LocalOrdinal LO;
2615 typedef GlobalOrdinal GO;
2622 #ifdef HAVE_TPETRA_MMM_TIMINGS 2623 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2624 using Teuchos::TimeMonitor;
2625 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Alloc"))));
2633 Aview.deleteContents();
2637 Aview.
rowMap = targetMap;
2643 if (userAssertsThereAreNoRemotes)
2646 RCP<const import_type> importer;
2647 if (params != null && params->isParameter(
"importer")) {
2648 importer = params->get<RCP<const import_type> >(
"importer");
2651 #ifdef HAVE_TPETRA_MMM_TIMINGS 2652 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X RemoteMap"))));
2657 RCP<const map_type> rowMap = A.
getRowMap(), remoteRowMap;
2658 size_t numRemote = 0;
2660 if (!prototypeImporter.is_null() &&
2661 prototypeImporter->getSourceMap()->isSameAs(*rowMap) &&
2662 prototypeImporter->getTargetMap()->isSameAs(*targetMap)) {
2664 ArrayView<const LO> remoteLIDs = prototypeImporter->getRemoteLIDs();
2665 numRemote = prototypeImporter->getNumRemoteIDs();
2667 Array<GO> remoteRows(numRemote);
2668 for (
size_t i = 0; i < numRemote; i++)
2669 remoteRows[i] = targetMap->getGlobalElement(remoteLIDs[i]);
2671 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
2672 rowMap->getIndexBase(), rowMap->getComm(), rowMap->getNode()));
2675 }
else if (prototypeImporter.is_null()) {
2677 ArrayView<const GO> rows = targetMap->getNodeElementList();
2678 size_t numRows = targetMap->getNodeNumElements();
2680 Array<GO> remoteRows(numRows);
2681 for(
size_t i = 0; i < numRows; ++i) {
2682 const LO mlid = rowMap->getLocalElement(rows[i]);
2684 if (mlid == Teuchos::OrdinalTraits<LO>::invalid())
2685 remoteRows[numRemote++] = rows[i];
2687 remoteRows.resize(numRemote);
2688 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
2689 rowMap->getIndexBase(), rowMap->getComm(), rowMap->getNode()));
2697 const int numProcs = rowMap->getComm()->getSize();
2699 TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error,
2700 "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows.");
2709 #ifdef HAVE_TPETRA_MMM_TIMINGS 2710 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Collective-0"))));
2714 Teuchos::reduceAll(*(rowMap->getComm()), Teuchos::REDUCE_MAX, (
global_size_t)numRemote, Teuchos::outArg(globalMaxNumRemote) );
2716 if (globalMaxNumRemote > 0) {
2717 #ifdef HAVE_TPETRA_MMM_TIMINGS 2718 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-2"))));
2722 importer = prototypeImporter->createRemoteOnlyImport(remoteRowMap);
2724 importer = rcp(
new import_type(rowMap, remoteRowMap));
2726 throw std::runtime_error(
"prototypeImporter->SourceMap() does not match A.getRowMap()!");
2730 params->set(
"importer", importer);
2733 if (importer != null) {
2734 #ifdef HAVE_TPETRA_MMM_TIMINGS 2735 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-3"))));
2739 Teuchos::ParameterList labelList;
2740 labelList.set(
"Timer Label", label);
2742 if(!params.is_null())
2743 labelList.set(
"compute global constants", params->get(
"compute global constants",
false));
2744 Aview.
importMatrix = Tpetra::importAndFillCompleteCrsMatrix<crs_matrix_type>(rcpFromRef(A), *importer,
2745 A.
getDomainMap(), importer->getTargetMap(), rcpFromRef(labelList));
2747 #ifdef HAVE_TPETRA_MMM_STATISTICS 2748 printMultiplicationStatistics(importer, label + std::string(
" I&X MMM"));
2752 #ifdef HAVE_TPETRA_MMM_TIMINGS 2753 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-4"))));
2772 #define TPETRA_MATRIXMATRIX_INSTANT(SCALAR,LO,GO,NODE) \ 2775 void MatrixMatrix::Multiply( \ 2776 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \ 2778 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \ 2780 CrsMatrix< SCALAR , LO , GO , NODE >& C, \ 2781 bool call_FillComplete_on_result, \ 2782 const std::string & label, \ 2783 const Teuchos::RCP<Teuchos::ParameterList>& params); \ 2786 void MatrixMatrix::Jacobi( \ 2788 const Vector< SCALAR, LO, GO, NODE > & Dinv, \ 2789 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \ 2790 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \ 2791 CrsMatrix< SCALAR , LO , GO , NODE >& C, \ 2792 bool call_FillComplete_on_result, \ 2793 const std::string & label, \ 2794 const Teuchos::RCP<Teuchos::ParameterList>& params); \ 2797 void MatrixMatrix::Add( \ 2798 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \ 2801 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \ 2804 Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > > C); \ 2807 void MatrixMatrix::Add( \ 2808 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \ 2811 CrsMatrix<SCALAR, LO, GO, NODE>& B, \ 2815 Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > > \ 2816 MatrixMatrix::add (const SCALAR & alpha, \ 2817 const bool transposeA, \ 2818 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \ 2819 const SCALAR & beta, \ 2820 const bool transposeB, \ 2821 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \ 2822 const Teuchos::RCP<const Map< LO , GO , NODE > >& domainMap, \ 2823 const Teuchos::RCP<const Map< LO , GO , NODE > >& rangeMap, \ 2824 const Teuchos::RCP<Teuchos::ParameterList>& params); \ 2828 #endif // TPETRA_MATRIXMATRIX_DEF_HPP ProfileType getProfileType() const
Returns true if the matrix was allocated with static data structures.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Teuchos::RCP< const map_type > importColMap
Colmap garnered as a result of the import.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
size_t getNodeMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on this node.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const
This matrix's graph, as a RowGraph.
bool isFillActive() const
Whether the matrix is not fill complete.
void Jacobi(Scalar omega, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Dinv, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Teuchos::RCP< const map_type > domainMap
Domain map for original matrix.
Teuchos::RCP< const map_type > getRowMap() const
The Map that describes the row distribution in this matrix.
Teuchos::RCP< const map_type > colMap
Col map for the original version of the matrix.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix's column Map with the given Map.
Teuchos::RCP< const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > origMatrix
The original matrix.
size_t getNumReceives() const
The number of processes from which we will receive data.
size_t getNumSends() const
The number of processes to which we will send data.
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global column indices.
size_t global_size_t
Global size_t object.
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using global indices.
void Add(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, Scalar scalarA, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarB)
global_size_t getGlobalNumRows() const
Number of global elements in the row map of this matrix.
bool isFillComplete() const
Whether the matrix is fill complete.
Construct and (optionally) redistribute the explicitly stored transpose of a CrsMatrix.
void keyValueMerge(KeyInputIterType keyBeg1, KeyInputIterType keyEnd1, ValueInputIterType valBeg1, ValueInputIterType valEnd1, KeyInputIterType keyBeg2, KeyInputIterType keyEnd2, ValueInputIterType valBeg2, ValueInputIterType valEnd2, KeyOutputIterType keyOut, ValueOutputIterType valOut, BinaryFunction f)
Merge two sorted (by keys) sequences of unique (key,value) pairs by combining pairs with equal keys...
Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > importMatrix
The imported matrix.
void scale(const Scalar &alpha)
Scale the matrix's values: this := alpha*this.
Sets up and executes a communication plan for a Tpetra DistObject.
void setAllValues(const typename local_matrix_type::row_map_type &ptr, const typename local_graph_type::entries_type::non_const_type &ind, const typename local_matrix_type::values_type &val)
Set the local matrix using three (compressed sparse row) arrays.
Teuchos::ArrayRCP< const Scalar > getData() const
Const view of the local values of this vector.
Utility functions for packing and unpacking sparse matrix entries.
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
KokkosSparse::CrsMatrix< impl_scalar_type, LocalOrdinal, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Tell the matrix that you are done changing its structure or values, and that you are ready to do comp...
void Multiply(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, bool transposeB, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Sparse matrix-matrix multiply.
size_t getNodeNumRows() const
The number of matrix rows owned by the calling process.
A parallel distribution of indices over processes.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
A read-only, row-oriented interface to a sparse matrix.
A distributed dense vector.
Stand-alone utility functions and macros.
Teuchos::RCP< const map_type > rowMap
Desired row map for "imported" version of the matrix.
void getLastDoStatistics(size_t &bytes_sent, size_t &bytes_recvd) const
Information on the last call to do/doReverse.
Struct that holds views of the contents of a CrsMatrix.
Teuchos::RCP< const map_type > getColMap() const
The Map that describes the column distribution in this matrix.
Teuchos::RCP< const map_type > getDomainMap() const
The domain Map of this matrix.
Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const bool transposeB, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap=Teuchos::null, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute the sparse matrix sum C = scalarA * Op(A) + scalarB * Op(B), where Op(X) is either X or its t...
Teuchos::RCP< const map_type > getRangeMap() const
The range Map of this matrix.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix is distributed.
Teuchos::RCP< const map_type > origRowMap
Original row map of matrix.
bool isLocallyIndexed() const
Whether the matrix is locally indexed on the calling process.