42 #ifndef __TPETRA_DISTRIBUTORLOWERTRIANGULARBLOCK_HPP
43 #define __TPETRA_DISTRIBUTORLOWERTRIANGULARBLOCK_HPP
46 #include "Tpetra_Distributor.hpp"
50 #include "Tpetra_Map.hpp"
51 #include "Tpetra_Operator.hpp"
52 #include "Tpetra_Vector.hpp"
58 template <
typename gno_t,
typename scalar_t>
59 class DistributionLowerTriangularBlock :
public Distribution<gno_t,scalar_t> {
141 using Distribution<gno_t,scalar_t>::me;
142 using Distribution<gno_t,scalar_t>::np;
143 using Distribution<gno_t,scalar_t>::comm;
144 using Distribution<gno_t,scalar_t>::nrows;
145 using typename Distribution<gno_t,scalar_t>::NZindex_t;
146 using typename Distribution<gno_t,scalar_t>::LocalNZmap_t;
151 DistributionLowerTriangularBlock(
size_t nrows_,
152 const Teuchos::RCP<
const Teuchos::Comm<int> > &comm_,
153 const Teuchos::ParameterList ¶ms) :
154 Distribution<gno_t,scalar_t>(nrows_, comm_, params),
155 initialDist(nrows_, comm_, params),
156 sortByDegree(false), permMatrix(Teuchos::null),
157 redistributed(false), chunksComputed(false), nChunks(0)
160 nChunks = int(std::sqrt(
float(npnp)));
161 while (nChunks * (nChunks + 1) < npnp) nChunks++;
163 TEUCHOS_TEST_FOR_EXCEPTION(nChunks * (nChunks+1) != npnp, std::logic_error,
164 "Number of processors np = " << np <<
165 " must satisfy np = q(q+1)/2 for some q" <<
166 " for LowerTriangularBlock distribution");
167 nChunksPerRow = double(nChunks) / double(nrows);
169 const Teuchos::ParameterEntry *pe = params.getEntryPtr(
"sortByDegree");
170 if (pe != NULL) sortByDegree = pe->getValue<
bool>(&sortByDegree);
172 pe = params.getEntryPtr(
"readPerProcess");
173 if (pe != NULL) redistributed = pe->getValue<
bool>(&redistributed);
175 if (me == 0) std::cout <<
"\n LowerTriangularBlock Distribution: "
177 <<
"\n nChunks = " << nChunks
181 enum DistributionType DistType() {
return LowerTriangularBlock; }
183 bool areChunksComputed() {
return chunksComputed; }
185 Teuchos::Array<gno_t> getChunkCuts() {
189 throw std::runtime_error(
"Error: Requested chunk cuts have not been computed yet.");
196 inline bool VecMine(gno_t i) {
return initialDist.VecMine(i); }
201 bool Mine(gno_t i, gno_t j) {
203 if (j > i)
return false;
204 else return (procFromChunks(i,j) == me);
207 return initialDist.Mine(i,j);
210 inline bool Mine(gno_t i, gno_t j,
int p) {
return Mine(i,j);}
213 void Redistribute(LocalNZmap_t &localNZ)
219 gno_t myFirstRow = initialDist.getFirstRow(me);
220 gno_t nMyRows = initialDist.getNumRow(me);
221 Teuchos::Array<gno_t> nnzPerRow(nMyRows, 0);
223 Teuchos::Array<int> rcvcnt(np);
224 Teuchos::Array<int> disp(np);
225 for (
int sum = 0, p = 0; p < np; p++) {
226 int prows = initialDist.getNumRow(p);
237 Teuchos::Array<gno_t> permuteIndex;
238 Teuchos::Array<gno_t> sortedOrder;
240 Teuchos::Array<gno_t> globalRowBuf;
244 globalRowBuf.resize(nrows, 0);
249 for (
auto it = localNZ.begin(); it != localNZ.end(); it++) {
250 gno_t I = it->first.first;
251 nnzPerRow[I-myFirstRow]++;
254 Teuchos::gatherv<int,gno_t>(nnzPerRow.getRawPtr(), nMyRows,
255 globalRowBuf.getRawPtr(),
256 rcvcnt.getRawPtr(), disp.getRawPtr(),
259 permuteIndex.resize(nrows);
260 sortedOrder.resize(nrows);
264 for (
size_t i = 0 ; i != nrows; i++) sortedOrder[i] = i;
266 std::sort(sortedOrder.begin(), sortedOrder.end(),
267 [&](
const size_t& a,
const size_t& b) {
268 return (globalRowBuf[a] > globalRowBuf[b]);
273 for (
size_t i = 0; i < nrows; i++) {
274 permuteIndex[sortedOrder[i]] = i;
278 Teuchos::broadcast<int,gno_t>(*comm, 0, permuteIndex(0,nrows));
287 Teuchos::Array<gno_t> myRows;
288 for (
size_t i = 0; i < nrows; i++) {
289 if (VecMine(i)) myRows.push_back(i);
293 Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid();
294 Teuchos::RCP<const map_t> permMap =
295 rcp(
new map_t(dummy, myRows(), 0, comm));
297 permMatrix = rcp(
new matrix_t(permMap, 1));
299 Teuchos::Array<gno_t> cols(1);
300 Teuchos::Array<scalar_t> vals(1); vals[0] = 1.;
302 for (
size_t i = 0; i < permMap->getNodeNumElements(); i++) {
303 gno_t gid = permMap->getGlobalElement(i);
304 cols[0] = permuteIndex[gid];
305 permMatrix->insertGlobalValues(gid, cols(), vals());
308 permMatrix->fillComplete(permMap, permMap);
314 nnzPerRow.assign(nMyRows, 0);
316 for (
auto it = localNZ.begin(); it != localNZ.end(); it++) {
317 gno_t I = (sortByDegree ? permuteIndex[it->first.first]
319 gno_t J = (sortByDegree ? permuteIndex[it->first.second]
322 nnzPerRow[it->first.first - myFirstRow]++;
330 Teuchos::gatherv<int,gno_t>(nnzPerRow.getRawPtr(), nMyRows,
331 globalRowBuf.getRawPtr(),
332 rcvcnt.getRawPtr(), disp.getRawPtr(),
335 Teuchos::Array<int>().swap(rcvcnt);
336 Teuchos::Array<int>().swap(disp);
339 Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, 1, &nnz, &gNnz);
341 chunkCuts.resize(nChunks+1, 0);
348 size_t target = gNnz / np;
349 size_t targetRunningTotal = 0;
350 size_t currentRunningTotal = 0;
352 for (
int chunkCnt = 0; chunkCnt < nChunks; chunkCnt++) {
353 targetRunningTotal = (target * (chunkCnt+1));
354 currentRunningTotal = 0;
355 while (I <
static_cast<gno_t
>(nrows)) {
356 size_t nextNnz = (sortByDegree ? globalRowBuf[sortedOrder[I]]
358 if (currentRunningTotal + nextNnz <= targetRunningTotal) {
359 currentRunningTotal += nextNnz;
365 chunkCuts[chunkCnt+1] = I;
367 chunkCuts[nChunks] =
static_cast<gno_t
>(nrows);
371 Teuchos::Array<gno_t>().swap(globalRowBuf);
373 Teuchos::broadcast<int,gno_t>(*comm, 0, chunkCuts(0,nChunks+1));
374 chunksComputed =
true;
381 Teuchos::Array<gno_t> iOut(localNZ.size());
382 Teuchos::Array<gno_t> jOut(localNZ.size());
383 Teuchos::Array<scalar_t> vOut(localNZ.size());
384 Teuchos::Array<int> pOut(localNZ.size());
387 for (
auto it = localNZ.begin(); it != localNZ.end(); it++) {
388 iOut[sendCnt] = (sortByDegree ? permuteIndex[it->first.first]
390 jOut[sendCnt] = (sortByDegree ? permuteIndex[it->first.second]
392 if (jOut[sendCnt] <= iOut[sendCnt]) {
393 vOut[sendCnt] = it->second;
394 pOut[sendCnt] = procFromChunks(iOut[sendCnt], jOut[sendCnt]);
408 LocalNZmap_t().swap(localNZ);
409 if (sortByDegree) Teuchos::Array<gno_t>().swap(permuteIndex);
413 size_t nrecvs = plan.createFromSends(pOut(0,sendCnt));
414 Teuchos::Array<gno_t> iIn(nrecvs);
415 Teuchos::Array<gno_t> jIn(nrecvs);
416 Teuchos::Array<scalar_t> vIn(nrecvs);
419 plan.doPostsAndWaits<gno_t>(iOut(0,sendCnt), 1, iIn());
420 plan.doPostsAndWaits<gno_t>(jOut(0,sendCnt), 1, jIn());
421 plan.doPostsAndWaits<scalar_t>(vOut(0,sendCnt), 1, vIn());
424 for (
size_t n = 0; n < nrecvs; n++) {
425 NZindex_t nz(iIn[n], jIn[n]);
426 localNZ[nz] = vIn[n];
429 redistributed =
true;
432 Teuchos::RCP<matrix_t> getPermutationMatrix()
const {
return permMatrix; }
436 Distribution1DLinear<gno_t,scalar_t> initialDist;
443 Teuchos::RCP<matrix_t> permMatrix;
457 double nChunksPerRow;
458 Teuchos::Array<gno_t> chunkCuts;
460 int findIdxInChunks(gno_t I) {
461 int m = I * nChunksPerRow;
462 while (I < chunkCuts[m]) m--;
463 while (I >= chunkCuts[m+1]) m++;
467 int procFromChunks(gno_t I, gno_t J) {
468 int m = findIdxInChunks(I);
469 int n = findIdxInChunks(J);
470 int p = m*(m+1)/2 + n;
479 template <
typename scalar_t,
480 class Node = ::Tpetra::Details::DefaultTypes::node_type>
481 class LowerTriangularBlockOperator :
483 Tpetra::Map<>::global_ordinal_type,
495 using dist_t = Tpetra::DistributionLowerTriangularBlock<gno_t, scalar_t>;
497 LowerTriangularBlockOperator(
498 const Teuchos::RCP<const matrix_t> &lowerTriangularMatrix_,
500 : lowerTriangularMatrix(lowerTriangularMatrix_),
501 permMatrix(dist.getPermutationMatrix())
505 TEUCHOS_TEST_FOR_EXCEPTION(
506 !lowerTriangularMatrix->getRangeMap()->isSameAs(
507 *lowerTriangularMatrix->getDomainMap()),
509 "The Domain and Range maps of the LowerTriangularBlock matrix "
514 vector_t diagByRowMap(lowerTriangularMatrix->getRowMap());
515 lowerTriangularMatrix->getLocalDiagCopy(diagByRowMap);
516 diag = Teuchos::rcp(
new vector_t(lowerTriangularMatrix->getRangeMap()));
518 lowerTriangularMatrix->getRangeMap());
519 diag->doExport(diagByRowMap, exporter,
Tpetra::ADD);
522 void apply(
const mvector_t &x, mvector_t &y, Teuchos::ETransp mode,
523 scalar_t alpha, scalar_t beta)
const
525 scalar_t
ZERO = Teuchos::ScalarTraits<scalar_t>::zero();
526 scalar_t ONE = Teuchos::ScalarTraits<scalar_t>::one();
528 if (beta ==
ZERO) y.putScalar(
ZERO);
533 if (permMatrix == Teuchos::null) {
536 lowerTriangularMatrix->apply(x, y, Teuchos::NO_TRANS, alpha, beta);
539 lowerTriangularMatrix->apply(x, y, Teuchos::TRANS, alpha, ONE);
542 y.elementWiseMultiply(-alpha, *diag, x, ONE);
551 vector_t xtmp(x.getMap(), x.getNumVectors());
552 vector_t ytmp(y.getMap(), y.getNumVectors());
554 permMatrix->apply(x, xtmp, Teuchos::TRANS);
555 if (beta !=
ZERO) permMatrix->apply(y, ytmp, Teuchos::TRANS);
558 lowerTriangularMatrix->apply(xtmp, ytmp, Teuchos::NO_TRANS, alpha, beta);
561 lowerTriangularMatrix->apply(xtmp, ytmp, Teuchos::TRANS, alpha, ONE);
564 ytmp.elementWiseMultiply(-alpha, *diag, xtmp, ONE);
566 permMatrix->apply(ytmp, y, Teuchos::NO_TRANS);
570 Teuchos::RCP<const map_t> getDomainMap()
const {
571 return lowerTriangularMatrix->getDomainMap();
574 Teuchos::RCP<const map_t> getRangeMap()
const {
575 return lowerTriangularMatrix->getRangeMap();
578 bool hasTransposeApply()
const {
return true;}
581 const Teuchos::RCP<const matrix_t > lowerTriangularMatrix;
582 const Teuchos::RCP<const matrix_t > permMatrix;
583 Teuchos::RCP<vector_t> diag;
Functions for initializing and finalizing Tpetra.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
Sets up and executes a communication plan for a Tpetra DistObject.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
A parallel distribution of indices over processes.
GlobalOrdinal global_ordinal_type
The type of global indices.
LocalOrdinal local_ordinal_type
The type of local indices.
One or more distributed dense vectors.
Abstract interface for operators (e.g., matrices and preconditioners).
virtual void apply(const MultiVector< scalar_t, Tpetra::Map<>::local_ordinal_type, Tpetra::Map<>::global_ordinal_type, ::Tpetra::Details::DefaultTypes::node_type > &X, MultiVector< scalar_t, Tpetra::Map<>::local_ordinal_type, Tpetra::Map<>::global_ordinal_type, ::Tpetra::Details::DefaultTypes::node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_t alpha=Teuchos::ScalarTraits< scalar_t >::one(), scalar_t beta=Teuchos::ScalarTraits< scalar_t >::zero()) const=0
Computes the operator-multivector application.
A distributed dense vector.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
size_t global_size_t
Global size_t object.
@ ZERO
Replace old values with zero.