Tpetra parallel linear algebra  Version of the Day
Tpetra_BlockCrsMatrix_Helpers_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_BLOCKCRSMATRIX_HELPERS_DEF_HPP
41 #define TPETRA_BLOCKCRSMATRIX_HELPERS_DEF_HPP
42 
44 
45 #include "Tpetra_BlockCrsMatrix.hpp"
46 #include "Tpetra_CrsMatrix.hpp"
47 #include "Tpetra_HashTable.hpp"
48 #include "Tpetra_Import.hpp"
49 #include "Tpetra_Map.hpp"
50 #include "Tpetra_MultiVector.hpp"
51 #include "Teuchos_ParameterList.hpp"
52 #include "Teuchos_ScalarTraits.hpp"
53 #include <ctime>
54 #include <fstream>
55 
56 namespace Tpetra {
57 
58  template<class Scalar, class LO, class GO, class Node>
59  void blockCrsMatrixWriter(BlockCrsMatrix<Scalar,LO,GO,Node> const &A, std::string const &fileName) {
60  Teuchos::ParameterList pl;
61  std::ofstream out;
62  out.open(fileName.c_str());
63  blockCrsMatrixWriter(A, out, pl);
64  }
65 
66  template<class Scalar, class LO, class GO, class Node>
67  void blockCrsMatrixWriter(BlockCrsMatrix<Scalar,LO,GO,Node> const &A, std::string const &fileName, Teuchos::ParameterList const &params) {
68  std::ofstream out;
69  out.open(fileName.c_str());
70  blockCrsMatrixWriter(A, out, params);
71  }
72 
73  template<class Scalar, class LO, class GO, class Node>
74  void blockCrsMatrixWriter(BlockCrsMatrix<Scalar,LO,GO,Node> const &A, std::ostream &os) {
75  Teuchos::ParameterList pl;
76  blockCrsMatrixWriter(A, os, pl);
77  }
78 
79  template<class Scalar, class LO, class GO, class Node>
80  void blockCrsMatrixWriter(BlockCrsMatrix<Scalar,LO,GO,Node> const &A, std::ostream &os, Teuchos::ParameterList const &params) {
81 
82  using Teuchos::RCP;
83  using Teuchos::rcp;
84 
85  typedef Teuchos::OrdinalTraits<GO> TOT;
86  typedef BlockCrsMatrix<Scalar, LO, GO, Node> block_crs_matrix_type;
87  typedef Tpetra::Import<LO, GO, Node> import_type;
88  typedef Tpetra::Map<LO, GO, Node> map_type;
90  typedef Tpetra::CrsGraph<LO, GO, Node> crs_graph_type;
91 
92  RCP<const map_type> const rowMap = A.getRowMap(); //"mesh" map
93  RCP<const Teuchos::Comm<int> > comm = rowMap->getComm();
94  const int myRank = comm->getRank();
95  const size_t numProcs = comm->getSize();
96 
97  // If true, force use of the import strip-mining infrastructure. This is useful for debugging on one process.
98  bool alwaysUseParallelAlgorithm = false;
99  if (params.isParameter("always use parallel algorithm"))
100  alwaysUseParallelAlgorithm = params.get<bool>("always use parallel algorithm");
101  bool printMatrixMarketHeader = true;
102  if (params.isParameter("print MatrixMarket header"))
103  printMatrixMarketHeader = params.get<bool>("print MatrixMarket header");
104 
105  if (printMatrixMarketHeader && myRank==0) {
106  std::time_t now = std::time(NULL);
107 
108  const std::string dataTypeStr =
109  Teuchos::ScalarTraits<Scalar>::isComplex ? "complex" : "real";
110 
111  // Explanation of first line of file:
112  // - "%%MatrixMarket" is the tag for Matrix Market format.
113  // - "matrix" is what we're printing.
114  // - "coordinate" means sparse (triplet format), rather than dense.
115  // - "real" / "complex" is the type (in an output format sense,
116  // not in a C++ sense) of each value of the matrix. (The
117  // other two possibilities are "integer" (not really necessary
118  // here) and "pattern" (no values, just graph).)
119  os << "%%MatrixMarket matrix coordinate " << dataTypeStr << " general" << std::endl;
120  os << "% time stamp: " << ctime(&now);
121  os << "% written from " << numProcs << " processes" << std::endl;
122  os << "% point representation of Tpetra::BlockCrsMatrix" << std::endl;
123  size_t numRows = A.getGlobalNumRows();
124  size_t numCols = A.getGlobalNumCols();
125  os << "% " << numRows << " block rows, " << numCols << " block columns" << std::endl;
126  const LO blockSize = A.getBlockSize();
127  os << "% block size " << blockSize << std::endl;
128  os << numRows*blockSize << " " << numCols*blockSize << " " << A.getGlobalNumEntries()*blockSize*blockSize << std::endl;
129  }
130 
131  if (numProcs==1 && !alwaysUseParallelAlgorithm) {
132  writeMatrixStrip(A,os,params);
133  } else {
134  size_t numRows = rowMap->getNodeNumElements();
135 
136  //Create source map
137  RCP<const map_type> allMeshGidsMap = rcp(new map_type(TOT::invalid(), numRows, A.getIndexBase(), comm));
138  //Create and populate vector of mesh GIDs corresponding to this pid's rows.
139  //This vector will be imported one pid's worth of information at a time to pid 0.
140  mv_type allMeshGids(allMeshGidsMap,1);
141  Teuchos::ArrayRCP<GO> allMeshGidsData = allMeshGids.getDataNonConst(0);
142 
143  for (size_t i=0; i<numRows; i++)
144  allMeshGidsData[i] = rowMap->getGlobalElement(i);
145  allMeshGidsData = Teuchos::null;
146 
147  // Now construct a RowMatrix on PE 0 by strip-mining the rows of the input matrix A.
148  size_t stripSize = allMeshGids.getGlobalLength() / numProcs;
149  size_t remainder = allMeshGids.getGlobalLength() % numProcs;
150  size_t curStart = 0;
151  size_t curStripSize = 0;
152  Teuchos::Array<GO> importMeshGidList;
153  for (size_t i=0; i<numProcs; i++) {
154  if (myRank==0) { // Only PE 0 does this part
155  curStripSize = stripSize;
156  if (i<remainder) curStripSize++; // handle leftovers
157  importMeshGidList.resize(curStripSize); // Set size of vector to max needed
158  for (size_t j=0; j<curStripSize; j++) importMeshGidList[j] = j + curStart + A.getIndexBase();
159  curStart += curStripSize;
160  }
161  // The following import map should be non-trivial only on PE 0.
162  TEUCHOS_TEST_FOR_EXCEPTION(myRank>0 && curStripSize!=0,
163  std::runtime_error, "Tpetra::blockCrsMatrixWriter: (pid "
164  << myRank << ") map size should be zero, but is " << curStripSize);
165  RCP<map_type> importMeshGidMap = rcp(new map_type(TOT::invalid(), importMeshGidList(), A.getIndexBase(), comm));
166  import_type gidImporter(allMeshGidsMap, importMeshGidMap);
167  mv_type importMeshGids(importMeshGidMap, 1);
168  importMeshGids.doImport(allMeshGids, gidImporter, INSERT);
169 
170  // importMeshGids now has a list of GIDs for the current strip of matrix rows.
171  // Use these values to build another importer that will get rows of the matrix.
172 
173  // The following import map will be non-trivial only on PE 0.
174  Teuchos::ArrayRCP<const GO> importMeshGidsData = importMeshGids.getData(0);
175  Teuchos::Array<GO> importMeshGidsGO;
176  importMeshGidsGO.reserve(importMeshGidsData.size());
177  for (typename Teuchos::ArrayRCP<const GO>::size_type j=0; j<importMeshGidsData.size(); ++j)
178  importMeshGidsGO.push_back(importMeshGidsData[j]);
179  RCP<const map_type> importMap = rcp(new map_type(TOT::invalid(), importMeshGidsGO(), rowMap->getIndexBase(), comm) );
180 
181  import_type importer(rowMap,importMap );
182  size_t numEntriesPerRow = A.getCrsGraph().getGlobalMaxNumRowEntries();
183  RCP<crs_graph_type> graph = createCrsGraph(importMap,numEntriesPerRow);
184  RCP<const map_type> domainMap = A.getCrsGraph().getDomainMap();
185  graph->doImport(A.getCrsGraph(), importer, INSERT);
186  graph->fillComplete(domainMap, importMap);
187 
188  block_crs_matrix_type importA(*graph, A.getBlockSize());
189  importA.doImport(A, importer, INSERT);
190 
191  // Finally we are ready to write this strip of the matrix
192  writeMatrixStrip(importA, os, params);
193  }
194  }
195  }
196 
197  template<class Scalar, class LO, class GO, class Node>
198  void writeMatrixStrip(BlockCrsMatrix<Scalar,LO,GO,Node> const &A, std::ostream &os, Teuchos::ParameterList const &params) {
199  using Teuchos::RCP;
200  using map_type = Tpetra::Map<LO, GO, Node>;
201  using bcrs_type = BlockCrsMatrix<Scalar,LO,GO,Node>;
202  using bcrs_local_inds_host_view_type = typename bcrs_type::local_inds_host_view_type;
203  using bcrs_values_host_view_type = typename bcrs_type::values_host_view_type;
204  using impl_scalar_type = typename bcrs_type::impl_scalar_type;
205 
206  size_t numRows = A.getGlobalNumRows();
207  RCP<const map_type> rowMap = A.getRowMap();
208  RCP<const map_type> colMap = A.getColMap();
209  RCP<const Teuchos::Comm<int> > comm = rowMap->getComm();
210  const int myRank = comm->getRank();
211 
212  const size_t meshRowOffset = rowMap->getIndexBase();
213  const size_t meshColOffset = colMap->getIndexBase();
214  TEUCHOS_TEST_FOR_EXCEPTION(meshRowOffset != meshColOffset,
215  std::runtime_error, "Tpetra::writeMatrixStrip: "
216  "mesh row index base != mesh column index base");
217 
218  if (myRank !=0) {
219 
220  TEUCHOS_TEST_FOR_EXCEPTION(A.getNodeNumRows() != 0,
221  std::runtime_error, "Tpetra::writeMatrixStrip: pid "
222  << myRank << " should have 0 rows but has " << A.getNodeNumRows());
223  TEUCHOS_TEST_FOR_EXCEPTION(A.getNodeNumCols() != 0,
224  std::runtime_error, "Tpetra::writeMatrixStrip: pid "
225  << myRank << " should have 0 columns but has " << A.getNodeNumCols());
226 
227  } else {
228 
229  TEUCHOS_TEST_FOR_EXCEPTION(numRows != A.getNodeNumRows(),
230  std::runtime_error, "Tpetra::writeMatrixStrip: "
231  "number of rows on pid 0 does not match global number of rows");
232 
233 
234  int err = 0;
235  const LO blockSize = A.getBlockSize();
236  const size_t numLocalRows = A.getNodeNumRows();
237  bool precisionChanged=false;
238  int oldPrecision = 0; // avoid "unused variable" warning
239  if (params.isParameter("precision")) {
240  oldPrecision = os.precision(params.get<int>("precision"));
241  precisionChanged=true;
242  }
243  int pointOffset = 1;
244  if (params.isParameter("zero-based indexing")) {
245  if (params.get<bool>("zero-based indexing") == true)
246  pointOffset = 0;
247  }
248 
249  size_t localRowInd;
250  for (localRowInd = 0; localRowInd < numLocalRows; ++localRowInd) {
251 
252  // Get a view of the current row.
253  bcrs_local_inds_host_view_type localColInds;
254  bcrs_values_host_view_type vals;
255  LO numEntries;
256  A.getLocalRowView (localRowInd, localColInds, vals); numEntries = localColInds.extent(0);
257  GO globalMeshRowID = rowMap->getGlobalElement(localRowInd) - meshRowOffset;
258 
259  for (LO k = 0; k < numEntries; ++k) {
260  GO globalMeshColID = colMap->getGlobalElement(localColInds[k]) - meshColOffset;
261  const impl_scalar_type* curBlock = vals.data() + blockSize * blockSize * k;
262  // Blocks are stored in row-major format.
263  for (LO j = 0; j < blockSize; ++j) {
264  GO globalPointRowID = globalMeshRowID * blockSize + j + pointOffset;
265  for (LO i = 0; i < blockSize; ++i) {
266  GO globalPointColID = globalMeshColID * blockSize + i + pointOffset;
267  const impl_scalar_type curVal = curBlock[i + j * blockSize];
268 
269  os << globalPointRowID << " " << globalPointColID << " ";
270  if (Teuchos::ScalarTraits<impl_scalar_type>::isComplex) {
271  // Matrix Market format wants complex values to be
272  // written as space-delimited pairs. See Bug 6469.
273  os << Teuchos::ScalarTraits<impl_scalar_type>::real (curVal) << " "
274  << Teuchos::ScalarTraits<impl_scalar_type>::imag (curVal);
275  }
276  else {
277  os << curVal;
278  }
279  os << std::endl;
280  }
281  }
282  }
283  }
284  if (precisionChanged)
285  os.precision(oldPrecision);
286  TEUCHOS_TEST_FOR_EXCEPTION(err != 0,
287  std::runtime_error, "Tpetra::writeMatrixStrip: "
288  "error getting view of local row " << localRowInd);
289 
290  }
291 
292  }
293 
294  template<class Scalar, class LO, class GO, class Node>
295  Teuchos::RCP<BlockCrsMatrix<Scalar, LO, GO, Node> >
296  convertToBlockCrsMatrix(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize)
297  {
298 
299  /*
300  ASSUMPTIONS:
301 
302  1) In point matrix, all entries associated with a little block are present (even if they are zero).
303  2) For given mesh DOF, point DOFs appear consecutively and in ascending order in row & column maps.
304  3) Point column map and block column map are ordered consistently.
305  */
306 
307  using Teuchos::Array;
308  using Teuchos::ArrayView;
309  using Teuchos::RCP;
310 
311  typedef Tpetra::BlockCrsMatrix<Scalar,LO,GO,Node> block_crs_matrix_type;
312  typedef Tpetra::Map<LO,GO,Node> map_type;
313  typedef Tpetra::CrsGraph<LO,GO,Node> crs_graph_type;
314  typedef Tpetra::CrsMatrix<Scalar, LO,GO,Node> crs_matrix_type;
315 
316  const map_type &pointRowMap = *(pointMatrix.getRowMap());
317  RCP<const map_type> meshRowMap = createMeshMap<LO,GO,Node>(blockSize, pointRowMap);
318 
319  const map_type &pointColMap = *(pointMatrix.getColMap());
320  RCP<const map_type> meshColMap = createMeshMap<LO,GO,Node>(blockSize, pointColMap);
321 
322  const map_type &pointDomainMap = *(pointMatrix.getDomainMap());
323  RCP<const map_type> meshDomainMap = createMeshMap<LO,GO,Node>(blockSize, pointDomainMap);
324 
325  const map_type &pointRangeMap = *(pointMatrix.getRangeMap());
326  RCP<const map_type> meshRangeMap = createMeshMap<LO,GO,Node>(blockSize, pointRangeMap);
327 
328  // Use graph ctor that provides column map and upper bound on nonzeros per row.
329  // We can use static profile because the point graph should have at least as many entries per
330  // row as the mesh graph.
331  RCP<crs_graph_type> meshCrsGraph = rcp(new crs_graph_type(meshRowMap, meshColMap,
332  pointMatrix.getGlobalMaxNumRowEntries(), Tpetra::StaticProfile));
333  // Fill the graph by walking through the matrix. For each mesh row, we query the collection of point
334  // rows associated with it. The point column ids are converted to mesh column ids and put into an array.
335  // As each point row collection is finished, the mesh column ids are sorted, made unique, and inserted
336  // into the mesh graph.
337  typename crs_matrix_type::local_inds_host_view_type pointColInds;
338  typename crs_matrix_type::values_host_view_type pointVals;
339  Array<GO> meshColGids;
340  meshColGids.reserve(pointMatrix.getGlobalMaxNumRowEntries());
341  //again, I assume that point GIDs associated with a mesh GID are consecutive.
342  //if they are not, this will break!!
343  for (size_t i=0; i<pointMatrix.getNodeNumRows()/blockSize; i++) {
344  for (int j=0; j<blockSize; ++j) {
345  LO rowLid = i*blockSize+j;
346  pointMatrix.getLocalRowView(rowLid,pointColInds,pointVals); //TODO optimization: Since I don't care about values,
347  //TODO I should use the graph instead.
348  for (size_t k=0; k<pointColInds.size(); ++k) {
349  GO meshColInd = pointColMap.getGlobalElement(pointColInds[k]) / blockSize;
350  meshColGids.push_back(meshColInd);
351  }
352  }
353  //List of mesh GIDs probably contains duplicates because we looped over all point rows in the block.
354  //Sort and make unique.
355  std::sort(meshColGids.begin(), meshColGids.end());
356  meshColGids.erase( std::unique(meshColGids.begin(), meshColGids.end()), meshColGids.end() );
357  meshCrsGraph->insertGlobalIndices(meshRowMap->getGlobalElement(i), meshColGids());
358  meshColGids.clear();
359  }
360  meshCrsGraph->fillComplete(meshDomainMap,meshRangeMap);
361 
362  //create and populate the block matrix
363  RCP<block_crs_matrix_type> blockMatrix = rcp(new block_crs_matrix_type(*meshCrsGraph, blockSize));
364 
365  //preallocate the maximum number of (dense) block entries needed by any row
366  int maxBlockEntries = blockMatrix->getNodeMaxNumRowEntries();
367  Array<Array<Scalar>> blocks(maxBlockEntries);
368  for (int i=0; i<maxBlockEntries; ++i)
369  blocks[i].reserve(blockSize*blockSize);
370  std::map<int,int> bcol2bentry; //maps block column index to dense block entries
371  std::map<int,int>::iterator iter;
372  //Fill the block matrix. We must do this in local index space.
373  //TODO: Optimization: We assume the blocks are fully populated in the point matrix. This means
374  //TODO: on the first point row in the block row, we know that we're hitting new block col indices.
375  //TODO: on other rows, we know the block col indices have all been seen before
376  //int offset;
377  //if (pointMatrix.getIndexBase()) offset = 0;
378  //else offset = 1;
379  for (size_t i=0; i<pointMatrix.getNodeNumRows()/blockSize; i++) {
380  int blkCnt=0; //how many unique block entries encountered so far in current block row
381  for (int j=0; j<blockSize; ++j) {
382  LO rowLid = i*blockSize+j;
383  pointMatrix.getLocalRowView(rowLid,pointColInds,pointVals);
384  for (size_t k=0; k<pointColInds.size(); ++k) {
385  //convert point column to block col
386  LO meshColInd = pointColInds[k] / blockSize;
387  iter = bcol2bentry.find(meshColInd);
388  if (iter == bcol2bentry.end()) {
389  //new block column
390  bcol2bentry[meshColInd] = blkCnt;
391  blocks[blkCnt].push_back(pointVals[k]);
392  blkCnt++;
393  } else {
394  //block column found previously
395  int littleBlock = iter->second;
396  blocks[littleBlock].push_back(pointVals[k]);
397  }
398  }
399  }
400  // TODO This inserts the blocks one block entry at a time. It is probably more efficient to
401  // TODO store all the blocks in a block row contiguously so they can be inserted with a single call.
402  for (iter=bcol2bentry.begin(); iter != bcol2bentry.end(); ++iter) {
403  LO localBlockCol = iter->first;
404  Scalar *vals = (blocks[iter->second]).getRawPtr();
405  blockMatrix->replaceLocalValues(i, &localBlockCol, vals, 1);
406  }
407 
408  //Done with block row. Zero everything out.
409  for (int j=0; j<maxBlockEntries; ++j)
410  blocks[j].clear();
411  blkCnt = 0;
412  bcol2bentry.clear();
413  }
414 
415  return blockMatrix;
416 
417  }
418 
419  template<class LO, class GO, class Node>
420  Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
421  createMeshMap (const LO& blockSize, const Tpetra::Map<LO,GO,Node>& pointMap)
422  {
423  typedef Teuchos::OrdinalTraits<Tpetra::global_size_t> TOT;
424  typedef Tpetra::Map<LO,GO,Node> map_type;
425 
426  //calculate mesh GIDs
427  Teuchos::ArrayView<const GO> pointGids = pointMap.getNodeElementList();
428  Teuchos::Array<GO> meshGids;
429  GO indexBase = pointMap.getIndexBase();
430 
431  // Use hash table to track whether we've encountered this GID previously. This will happen
432  // when striding through the point DOFs in a block. It should not happen otherwise.
433  // I don't use sort/make unique because I don't want to change the ordering.
434  meshGids.reserve(pointGids.size());
435  Tpetra::Details::HashTable<GO,int> hashTable(pointGids.size());
436  for (int i=0; i<pointGids.size(); ++i) {
437  GO meshGid = (pointGids[i]-indexBase) / blockSize + indexBase;
438  if (hashTable.get(meshGid) == -1) {
439  hashTable.add(meshGid,1); //(key,value)
440  meshGids.push_back(meshGid);
441  }
442  }
443 
444  Teuchos::RCP<const map_type> meshMap = Teuchos::rcp( new map_type(TOT::invalid(), meshGids(), 0, pointMap.getComm()) );
445  return meshMap;
446 
447  }
448 
449 
450  template<class LO, class GO, class Node>
451  Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
452  createPointMap (const LO& blockSize, const Tpetra::Map<LO,GO,Node>& blockMap)
453  {
454  typedef Teuchos::OrdinalTraits<Tpetra::global_size_t> TOT;
455  typedef Tpetra::Map<LO,GO,Node> map_type;
456 
457  //calculate mesh GIDs
458  Teuchos::ArrayView<const GO> blockGids = blockMap.getNodeElementList();
459  Teuchos::Array<GO> pointGids(blockGids.size() * blockSize);
460  GO indexBase = blockMap.getIndexBase();
461 
462  for(LO i=0, ct=0; i<(LO)blockGids.size(); i++) {
463  GO base = (blockGids[i] - indexBase)* blockSize + indexBase;
464  for(LO j=0; j<blockSize; j++, ct++)
465  pointGids[i*blockSize+j] = base+j;
466  }
467 
468  Teuchos::RCP<const map_type> pointMap = Teuchos::rcp( new map_type(TOT::invalid(), pointGids(), 0, blockMap.getComm()) );
469  return pointMap;
470 
471  }
472 
473 
474  template<class Scalar, class LO, class GO, class Node>
475  Teuchos::RCP<CrsMatrix<Scalar, LO, GO, Node> >
477 
478  using Teuchos::Array;
479  using Teuchos::ArrayView;
480  using Teuchos::RCP;
481 
482  typedef Tpetra::BlockCrsMatrix<Scalar,LO,GO,Node> block_crs_matrix_type;
483  typedef Tpetra::Map<LO,GO,Node> map_type;
484  typedef Tpetra::CrsMatrix<Scalar, LO,GO,Node> crs_matrix_type;
485 
486  using crs_graph_type = typename block_crs_matrix_type::crs_graph_type;
487  using local_graph_device_type = typename crs_matrix_type::local_graph_device_type;
488  using local_matrix_device_type = typename crs_matrix_type::local_matrix_device_type;
489  using row_map_type = typename local_graph_device_type::row_map_type::non_const_type;
490  using entries_type = typename local_graph_device_type::entries_type::non_const_type;
491  using values_type = typename local_matrix_device_type::values_type::non_const_type;
492 
493  using row_map_type_const = typename local_graph_device_type::row_map_type;
494  using entries_type_const = typename local_graph_device_type::entries_type;
495 
496  using little_block_type = typename block_crs_matrix_type::const_little_block_type;
497  using offset_type = typename row_map_type::non_const_value_type;
498  using column_type = typename entries_type::non_const_value_type;
499 
500  using execution_space = typename Node::execution_space;
501  using range_type = Kokkos::RangePolicy<execution_space, LO>;
502 
503 
504  LO blocksize = blockMatrix.getBlockSize();
505  const offset_type bs2 = blocksize * blocksize;
506  size_t block_nnz = blockMatrix.getNodeNumEntries();
507  size_t point_nnz = block_nnz * bs2;
508 
509  // We can get these from the blockMatrix directly
510  RCP<const map_type> pointDomainMap = blockMatrix.getDomainMap();
511  RCP<const map_type> pointRangeMap = blockMatrix.getRangeMap();
512 
513  // We need to generate the row/col Map ourselves.
514  RCP<const map_type> blockRowMap = blockMatrix.getRowMap();
515  RCP<const map_type> pointRowMap = createPointMap<LO,GO,Node>(blocksize, *blockRowMap);
516 
517  RCP<const map_type> blockColMap = blockMatrix.getColMap();
518  RCP<const map_type> pointColMap = createPointMap<LO,GO,Node>(blocksize, *blockColMap);
519 
520  // Get the last few things
521 
522  const crs_graph_type & blockGraph = blockMatrix.getCrsGraph();
523  LO point_rows = (LO) pointRowMap->getNodeNumElements();
524  LO block_rows = (LO) blockRowMap->getNodeNumElements();
525  auto blockValues = blockMatrix.getValuesDevice();
526  auto blockLocalGraph = blockGraph.getLocalGraphDevice();
527  row_map_type_const blockRowptr = blockLocalGraph.row_map;
528  entries_type_const blockColind = blockLocalGraph.entries;
529 
530  // Generate the point matrix rowptr / colind / values
531  row_map_type rowptr("row_map", point_rows+1);
532  entries_type colind("entries", point_nnz);
533  values_type values("values", point_nnz);
534 
535  // Pre-fill the rowptr
536  Kokkos::parallel_for("fillRowPtr",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) {
537  offset_type base = blockRowptr[i];
538  offset_type diff = blockRowptr[i+1] - base;
539  for(LO j=0; j<blocksize; j++) {
540  rowptr[i*blocksize +j] = base*bs2 + j*diff*blocksize;
541  }
542 
543  // Fill the last guy, if we're on the final entry
544  if(i==block_rows-1) {
545  rowptr[block_rows*blocksize] = blockRowptr[i+1] * bs2;
546  }
547  });
548 
549 
550  Kokkos::parallel_for("copyEntriesAndValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) {
551  const offset_type blkBeg = blockRowptr[i];
552  const offset_type numBlocks = blockRowptr[i+1] - blkBeg;
553 
554  // For each block in the row...
555  for (offset_type block=0; block < numBlocks; block++) {
556  column_type point_col_base = blockColind[blkBeg + block] * blocksize;
557  little_block_type my_block(blockValues.data () + (blkBeg+block) * bs2, blocksize, blocksize);
558 
559  // For each entry in the block...
560  for(LO little_row=0; little_row<blocksize; little_row++) {
561  offset_type point_row_offset = rowptr[i*blocksize + little_row];
562  for(LO little_col=0; little_col<blocksize; little_col++) {
563  colind[point_row_offset + block*blocksize + little_col] = point_col_base + little_col;
564  values[point_row_offset + block*blocksize + little_col] = my_block(little_row,little_col);
565  }
566  }
567 
568  }
569  });
570 
571  // Generate the matrix
572  RCP<crs_matrix_type> pointCrsMatrix = rcp(new crs_matrix_type(pointRowMap, pointColMap, 0));
573  pointCrsMatrix->setAllValues(rowptr,colind,values);
574 
575  // FUTURE OPTIMIZATION: Directly compute import/export, rather than letting ESFC do it
576  pointCrsMatrix->expertStaticFillComplete(pointDomainMap,pointRangeMap);
577 
578  return pointCrsMatrix;
579  }
580 
581 
582 } // namespace Tpetra
583 
584 //
585 // Explicit instantiation macro for blockCrsMatrixWriter (various
586 // overloads), writeMatrixStrip, and convertToBlockCrsMatrix.
587 //
588 // Must be expanded from within the Tpetra namespace!
589 //
590 #define TPETRA_BLOCKCRSMATRIX_HELPERS_INSTANT(S,LO,GO,NODE) \
591  template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::string const &fileName); \
592  template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::string const &fileName, Teuchos::ParameterList const &params); \
593  template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::ostream &os); \
594  template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::ostream &os, Teuchos::ParameterList const &params); \
595  template void writeMatrixStrip(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::ostream &os, Teuchos::ParameterList const &params); \
596  template Teuchos::RCP<BlockCrsMatrix<S, LO, GO, NODE> > convertToBlockCrsMatrix(const CrsMatrix<S, LO, GO, NODE>& pointMatrix, const LO &blockSize); \
597  template Teuchos::RCP<CrsMatrix<S, LO, GO, NODE> > convertToCrsMatrix(const BlockCrsMatrix<S, LO, GO, NODE>& blockMatrix);
598 
599 //
600 // Explicit instantiation macro for createMeshMap / createPointMap
601 //
602 #define TPETRA_CREATEMESHMAP_INSTANT(LO,GO,NODE) \
603  template Teuchos::RCP<const Map<LO,GO,NODE> > createMeshMap (const LO& blockSize, const Map<LO,GO,NODE>& pointMap); \
604  template Teuchos::RCP<const Map<LO,GO,NODE> > createPointMap (const LO& blockSize, const Map<LO,GO,NODE>& blockMap);
605 
606 #endif // TPETRA_BLOCKCRSMATRIX_HELPERS_DEF_HPP
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
virtual size_t getNodeNumEntries() const override
The local number of stored (structurally nonzero) entries.
Teuchos::RCP< const map_type > getRangeMap() const override
Get the (point) range Map of this matrix.
Teuchos::RCP< const map_type > getColMap() const override
get the (mesh) map for the columns of this block matrix.
virtual global_size_t getGlobalNumCols() const override
The global number of columns of this matrix.
Teuchos::RCP< const map_type > getRowMap() const override
get the (mesh) map for the rows of this block matrix.
void getLocalRowView(LO LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
size_t getNodeNumRows() const override
get the local number of block rows
virtual GO getIndexBase() const override
The index base for global indices in this matrix.
virtual global_size_t getGlobalNumEntries() const override
The global number of stored (structurally nonzero) entries.
global_size_t getGlobalNumRows() const override
get the global number of block rows
Teuchos::RCP< const map_type > getDomainMap() const override
Get the (point) domain Map of this matrix.
virtual size_t getNodeNumCols() const override
The number of columns needed to apply the forward operator on this node.
LO getBlockSize() const
The number of degrees of freedom per mesh point.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, over all processes in the graph's communicator.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
size_t getNodeNumRows() const override
The number of matrix rows owned by the calling process.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, over all processes in the matrix's communicator.
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
void getLocalRowView(LocalOrdinal LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant view of a row of this matrix, using local row and column indices.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.
ValueType get(const KeyType key)
Get the value corresponding to the given key.
void add(const KeyType key, const ValueType value)
Add a key and its value to the hash table.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Teuchos::ArrayView< const global_ordinal_type > getNodeElementList() const
Return a NONOWNING view of the global indices owned by this process.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
global_ordinal_type getIndexBase() const
The index base for this Map.
One or more distributed dense vectors.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
void writeMatrixStrip(BlockCrsMatrix< Scalar, LO, GO, Node > const &A, std::ostream &os, Teuchos::ParameterList const &params)
Helper function called by blockCrsMatrixWriter.
Teuchos::RCP< CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > createCrsGraph(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t maxNumEntriesPerRow=0, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Nonmember function to create an empty CrsGraph given a row Map and the max number of entries allowed ...
Teuchos::RCP< const Tpetra::Map< LO, GO, Node > > createPointMap(LO const &blockSize, const Tpetra::Map< LO, GO, Node > &blockMap)
Helper function to generate a point map from a block map (with a given block size) GIDs associated wi...
Teuchos::RCP< CrsMatrix< Scalar, LO, GO, Node > > convertToCrsMatrix(const Tpetra::BlockCrsMatrix< Scalar, LO, GO, Node > &blockMatrix)
Non-member constructor that creates a point CrsMatrix from an existing BlockCrsMatrix.
Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > convertToBlockCrsMatrix(const Tpetra::CrsMatrix< Scalar, LO, GO, Node > &pointMatrix, const LO &blockSize)
Non-member constructor that creates a BlockCrsMatrix from an existing point CrsMatrix.
void blockCrsMatrixWriter(BlockCrsMatrix< Scalar, LO, GO, Node > const &A, std::string const &fileName)
Helper function to write a BlockCrsMatrix. Calls the 3-argument version.
@ INSERT
Insert new values that don't currently exist.
Teuchos::RCP< const Tpetra::Map< LO, GO, Node > > createMeshMap(LO const &blockSize, const Tpetra::Map< LO, GO, Node > &pointMap)
Helper function to generate a mesh map from a point map. Important! It's assumed that point GIDs asso...