Tpetra parallel linear algebra  Version of the Day
MatrixMarket_TpetraNew.hpp
1 
2 #ifndef __MATRIXMARKET_TPETRA_NEW_HPP
3 #define __MATRIXMARKET_TPETRA_NEW_HPP
4 
6 // Functions to read a MatrixMarket file and load it into a matrix.
7 // Adapted from
8 // Trilinos/packages/epetraext/src/inout/EpetraExt_CrsMatrixIn.cpp
9 // Modified by Jon Berry and Karen Devine to make matrix reallocations
10 // more efficient; rather than insert each non-zero separately, we
11 // collect rows of non-zeros for insertion.
12 // Modified by Karen Devine and Steve Plimpton to prevent all processors
13 // from reading the same file at the same time (ouch!); chunks of the file
14 // are read and broadcast by processor zero; each processor then extracts
15 // its nonzeros from the chunk, sorts them by row, and inserts them into
16 // the matrix.
17 // The variable "chunkSize" can be changed to modify the size of the chunks
18 // read from the file. Larger values of chunkSize lead to faster execution
19 // and greater memory use.
21 
22 private:
23 
24 template <typename global_ordinal_type, typename scalar_type>
25 static
26 Teuchos::RCP<Distribution<global_ordinal_type,scalar_type> >
27 buildDistribution(
28  std::string &distribution, // string indicating whether to use 1D, 2D or
29  // file-based matrix distribution
30  size_t nRow, // Number of global matrix rows
31  size_t nCol, // Number of global matrix rows
32  const Teuchos::ParameterList &params, // Parameters to the file reading
33  const Teuchos::RCP<const Teuchos::Comm<int> > &comm
34  // communicator to be used in maps
35 )
36 {
37  // Function to build the sets of GIDs for 1D or 2D matrix distribution.
38  // Output is a Distribution object.
39 
40  int me = comm->getRank();
41 
42  using basedist_t = Distribution<global_ordinal_type,scalar_type>;
43  basedist_t *retval;
44 
45  bool randomize = false; // Randomly permute rows and columns before storing
46  {
47  const Teuchos::ParameterEntry *pe = params.getEntryPtr("randomize");
48  if (pe != NULL)
49  randomize = pe->getValue<bool>(&randomize);
50  }
51 
52  std::string partitionFile = ""; // File to reassign rows to parts
53  {
54  const Teuchos::ParameterEntry *pe = params.getEntryPtr("partitionFile");
55  if (pe != NULL)
56  partitionFile = pe->getValue<std::string>(&partitionFile);
57  }
58 
59  if (distribution == "2D") { // Generate 2D distribution
60  if (partitionFile != "") {
61  // Generate 2D distribution with vector partition file
62  TEUCHOS_TEST_FOR_EXCEPTION(randomize, std::logic_error,
63  "Randomization not available for 2DVec distributions.");
64  Distribution2DVec<global_ordinal_type,scalar_type> *dist =
65  new Distribution2DVec<global_ordinal_type, scalar_type>(
66  nRow, comm, params, partitionFile);
67  retval = dynamic_cast<basedist_t *>(dist);
68  }
69  else if (randomize) {
70  // Randomly assign rows/columns to processors
71  Distribution2DRandom<global_ordinal_type,scalar_type> *dist =
72  new Distribution2DRandom<global_ordinal_type,scalar_type>(
73  nRow, comm, params);
74  retval = dynamic_cast<basedist_t *>(dist);
75  }
76  else {
77  // The vector will be distributed linearly, with extra vector entries
78  // spread among processors to maintain balance in rows and columns.
79  Distribution2DLinear<global_ordinal_type,scalar_type> *dist =
80  new Distribution2DLinear<global_ordinal_type,scalar_type>(
81  nRow, comm, params);
82  retval = dynamic_cast<basedist_t *>(dist);
83  }
84  }
85  else if (distribution == "1D") {
86  if (partitionFile != "") {
87  // Generate 1D row-based distribution with vector partition file
88  Distribution1DVec<global_ordinal_type,scalar_type> *dist =
89  new Distribution1DVec<global_ordinal_type,scalar_type>(
90  nRow, comm, params, partitionFile);
91  retval = dynamic_cast<basedist_t*>(dist);
92  }
93  else if (randomize) {
94  // Randomly assign rows to processors.
95  Distribution1DRandom<global_ordinal_type,scalar_type> *dist =
96  new Distribution1DRandom<global_ordinal_type,scalar_type>(
97  nRow, comm, params);
98  retval = dynamic_cast<basedist_t *>(dist);
99  }
100  else {
101  // Linear map similar to Trilinos default.
102  Distribution1DLinear<global_ordinal_type,scalar_type> *dist =
103  new Distribution1DLinear<global_ordinal_type,scalar_type>(
104  nRow, comm, params);
105  retval = dynamic_cast<basedist_t *>(dist);
106  }
107  }
108  else if (distribution == "LowerTriangularBlock") {
109  if (randomize && me == 0) {
110  std::cout << "WARNING: Randomization not available for "
111  << "LowerTriangularBlock distributions." << std::endl;
112  }
113 
114  DistributionLowerTriangularBlock<global_ordinal_type,scalar_type> *dist =
115  new DistributionLowerTriangularBlock<global_ordinal_type,scalar_type>(
116  nRow, comm, params);
117  retval = dynamic_cast<basedist_t*>(dist);
118  }
119  else if (distribution == "MMFile") {
120  // Generate user-defined distribution from Matrix-Market file
121  if (randomize && me == 0) {
122  std::cout << "WARNING: Randomization not available for MMFile "
123  << "distributions." << std::endl;
124  }
125  DistributionMMFile<global_ordinal_type,scalar_type> *dist =
126  new DistributionMMFile<global_ordinal_type,scalar_type>(
127  nRow, comm, params);
128  retval = dynamic_cast<basedist_t*>(dist);
129  }
130 
131  else {
132  std::cout << "ERROR: Invalid distribution method " << distribution
133  << std::endl;
134  exit(-1);
135  }
136  return Teuchos::rcp<basedist_t>(retval);
137 }
138 
139 static
140 void
141 readMatrixMarket(
142  const std::string &filename, // MatrixMarket file to read
143  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
144  const Teuchos::ParameterList &params,
145  size_t &nRow,
146  size_t &nCol,
147  typename Distribution<global_ordinal_type,scalar_type>::LocalNZmap_t &localNZ,
148  Teuchos::RCP<Distribution<global_ordinal_type,scalar_type> > &dist
149 )
150 {
151  bool useTimers = false; // should we time various parts of the reader?
152  {
153  const Teuchos::ParameterEntry *pe = params.getEntryPtr("useTimers");
154  if (pe != NULL)
155  useTimers = pe->getValue<bool>(&useTimers);
156  }
157 
158  Teuchos::RCP<Teuchos::TimeMonitor> timer = Teuchos::null;
159  if (useTimers)
160  timer = rcp(new Teuchos::TimeMonitor(
161  *Teuchos::TimeMonitor::getNewTimer("RMM parameterSetup")));
162 
163  int me = comm->getRank();
164 
165  bool verbose = false; // Print status as reading
166  {
167  const Teuchos::ParameterEntry *pe = params.getEntryPtr("verbose");
168  if (pe != NULL)
169  verbose = pe->getValue<bool>(&verbose);
170  }
171 
172  size_t chunkSize = 500000; // Number of lines to read / broadcast at once
173  {
174  const Teuchos::ParameterEntry *pe = params.getEntryPtr("chunkSize");
175  if (pe != NULL)
176  chunkSize = pe->getValue<size_t>(&chunkSize);
177  }
178 
179  bool symmetrize = false; // Symmetrize the matrix
180  {
181  const Teuchos::ParameterEntry *pe = params.getEntryPtr("symmetrize");
182  if (pe != NULL)
183  symmetrize = pe->getValue<bool>(&symmetrize);
184  }
185 
186  bool transpose = false; // Store the transpose
187  {
188  const Teuchos::ParameterEntry *pe = params.getEntryPtr("transpose");
189  if (pe != NULL)
190  transpose = pe->getValue<bool>(&transpose);
191  }
192 
193  std::string diagonal = ""; // Are diagonal entries required (so add them)
194  // or ignored (so delete them) or neither?
195  // Default is neither.
196  {
197  const Teuchos::ParameterEntry *pe = params.getEntryPtr("diagonal");
198  if (pe != NULL)
199  diagonal = pe->getValue<std::string>(&diagonal);
200  }
201  bool ignoreDiagonal = (diagonal == "exclude");
202  bool requireDiagonal = (diagonal == "require");
203 
204  std::string distribution = "1D"; // Default distribution is 1D row-based
205  {
206  const Teuchos::ParameterEntry *pe = params.getEntryPtr("distribution");
207  if (pe != NULL)
208  distribution = pe->getValue<std::string>(&distribution);
209  }
210 
211  if (useTimers) {
212  timer = Teuchos::null;
213  timer = rcp(new Teuchos::TimeMonitor(
214  *Teuchos::TimeMonitor::getNewTimer("RMM header")));
215  }
216 
217  if (verbose && me == 0)
218  std::cout << "Reading matrix market file... " << filename << std::endl;
219 
220  FILE *fp = NULL;
221  size_t dim[3] = {0,0,0}; // nRow, nCol, nNz as read from MatrixMarket
222  MM_typecode mmcode;
223 
224  if (me == 0) {
225 
226  fp = fopen(filename.c_str(), "r");
227 
228  if (fp == NULL) {
229  std::cout << "Error: cannot open file " << filename << std::endl;
230  }
231  else {
232  // Read MatrixMarket banner to get type of matrix
233  if (mm_read_banner(fp, &mmcode) != 0) {
234  std::cout << "Error: bad MatrixMarket banner " << std::endl;
235  }
236  else {
237  // Error check for file types that this function can read
238  if (!mm_is_valid(mmcode) ||
239  mm_is_dense(mmcode) || mm_is_array(mmcode) ||
240  mm_is_complex(mmcode) ||
241  mm_is_skew(mmcode) || mm_is_hermitian(mmcode)) {
242  std::cout << "Error: matrix type is not supported by this reader\n "
243  << "This reader supports sparse, coordinate, "
244  << "real, pattern, integer, symmetric, general"
245  << std::endl;
246  }
247  else {
248  // Read nRow, nCol, nNz from MatrixMarket file
249  if (mm_read_mtx_crd_size(fp, &dim[0], &dim[1], &dim[2]) != 0) {
250  std::cout << "Error: bad matrix dimensions " << std::endl;
251  dim[0] = dim[1] = dim[2] = 0;
252  }
253  }
254  }
255  }
256  }
257 
258  // Broadcast relevant info
259  // Bad input if dim[0] or dim[1] still is zero after broadcast;
260  // all procs throw together
261  Teuchos::broadcast<int, size_t>(*comm, 0, 3, dim);
262  if (dim[0] == 0 || dim[1] == 0) {
263  throw std::runtime_error("Error: bad matrix header information");
264  }
265  Teuchos::broadcast<int, char>(*comm, 0, sizeof(MM_typecode), mmcode);
266 
267  nRow = dim[0];
268  nCol = dim[1];
269 
270  TEUCHOS_TEST_FOR_EXCEPTION(nRow != nCol, std::logic_error,
271  "This overload of readSparseFile requires nRow == nCol "
272  << "(nRow = " << nRow << ", nCol = " << nCol << "); "
273  << "For now, use a different overload of readSparseFile until this "
274  << "overload is fixed to read rectangular matrices. "
275  << "See Trilinos github issues #7045 and #8472.");
276 
277  size_t nNz = dim[2];
278  bool patternInput = mm_is_pattern(mmcode);
279  bool symmetricInput = mm_is_symmetric(mmcode);
280  if (symmetricInput) symmetrize = true;
281 
282  if (verbose && me == 0)
283  std::cout << "Matrix market file... "
284  << "\n symmetrize = " << symmetrize
285  << "\n transpose = " << transpose
286  << "\n change diagonal = " << diagonal
287  << "\n distribution = " << distribution
288  << std::endl;
289 
290  if (useTimers) {
291  timer = Teuchos::null;
292  timer = rcp(new Teuchos::TimeMonitor(
293  *Teuchos::TimeMonitor::getNewTimer("RMM distribution")));
294  }
295 
296  // Create distribution based on nRow, nCol, npRow, npCol
297  dist = buildDistribution<global_ordinal_type,scalar_type>(distribution,
298  nRow, nCol, params,
299  comm);
300  if (useTimers) {
301  timer = Teuchos::null;
302  timer = rcp(new Teuchos::TimeMonitor(
303  *Teuchos::TimeMonitor::getNewTimer("RMM readChunks")));
304  }
305 
306  std::set<global_ordinal_type> diagset;
307  // If diagonal == require, this set keeps track of
308  // which diagonal entries already existing so we can
309  // add those that don't
310 
311  using nzindex_t =
312  typename Distribution<global_ordinal_type,scalar_type>::NZindex_t;
313 
314  // Chunk information and buffers
315  const int maxLineLength = 81;
316  char *buffer = new char[chunkSize*maxLineLength+1];
317  size_t nChunk;
318  size_t nMillion = 0;
319  size_t nRead = 0;
320  size_t rlen;
321 
322  auto timerRead = Teuchos::TimeMonitor::getNewTimer("RMM readNonzeros");
323  auto timerSelect = Teuchos::TimeMonitor::getNewTimer("RMM selectNonzeros");
324  // Read chunks until the entire file is read
325  Teuchos::RCP<Teuchos::TimeMonitor> innerTimer = Teuchos::null;
326  while (nRead < nNz) {
327 
328  innerTimer = rcp(new Teuchos::TimeMonitor(*timerRead));
329 
330  if (nNz-nRead > chunkSize) nChunk = chunkSize;
331  else nChunk = (nNz - nRead);
332 
333  // Processor 0 reads a chunk
334  if (me == 0) {
335  char *eof;
336  rlen = 0;
337  for (size_t i = 0; i < nChunk; i++) {
338  eof = fgets(&buffer[rlen],maxLineLength,fp);
339  if (eof == NULL) {
340  std::cout << "Unexpected end of matrix file." << std::endl;
341  std::cout.flush();
342  exit(-1);
343  }
344  rlen += strlen(&buffer[rlen]);
345  }
346  buffer[rlen++]='\n';
347  }
348 
349  // Processor 0 broadcasts the chunk
350  Teuchos::broadcast<int, size_t>(*comm, 0, 1, &rlen);
351  Teuchos::broadcast<int, char>(*comm, 0, rlen, buffer);
352 
353  buffer[rlen++] = '\0';
354  nRead += nChunk;
355 
356  innerTimer = Teuchos::null;
357  innerTimer = rcp(new Teuchos::TimeMonitor(*timerSelect));
358 
359  // All processors check the received data, saving nonzeros belonging to them
360  char *lineptr = buffer;
361  for (rlen = 0; rlen < nChunk; rlen++) {
362 
363  char *next = strchr(lineptr,'\n');
364  global_ordinal_type I = atol(strtok(lineptr," \t\n"))
365  - 1; // since MatrixMarket files are one-based
366  global_ordinal_type J = atol(strtok(NULL," \t\n"))
367  - 1; // since MatrixMarket files are one-based
368 
369  scalar_type V = (patternInput ? -1. : atof(strtok(NULL," \t\n")));
370  lineptr = next + 1;
371 
372  // Special processing of nonzero
373  if ((I == J) && ignoreDiagonal) continue;
374 
375  if (transpose) std::swap<global_ordinal_type>(I,J);
376 
377  // Add nonzero (I,J) to the map if it should be on this processor
378  // Some file-based distributions have processor assignment stored as
379  // the non-zero's value, so pass the value to Mine.
380  if (dist->Mine(I,J,int(V))) {
381  nzindex_t idx = std::make_pair(I,J);
382  localNZ[idx] = V;
383  if (requireDiagonal && (I == J)) diagset.insert(I);
384  }
385 
386  // If symmetrizing, add (J,I) to the map if it should be on this processor
387  // Some file-based distributions have processor assignment stored as
388  // the non-zero's value, so pass the value to Mine.
389  if (symmetrize && (I != J) && dist->Mine(J,I,int(V))) {
390  // Add entry (J, I) if need to symmetrize
391  // This processor keeps this non-zero.
392  nzindex_t idx = std::make_pair(J,I);
393  localNZ[idx] = V;
394  }
395  }
396 
397  // Status check
398  if (verbose) {
399  if (nRead / 1000000 > nMillion) {
400  nMillion++;
401  if (me == 0) std::cout << nMillion << "M ";
402  }
403  }
404 
405  innerTimer = Teuchos::null;
406  }
407 
408  if (verbose && me == 0)
409  std::cout << std::endl << nRead << " nonzeros read " << std::endl;
410 
411  if (fp != NULL) fclose(fp);
412  delete [] buffer;
413 
414  if (useTimers) {
415  timer = Teuchos::null;
416  timer = rcp(new Teuchos::TimeMonitor(
417  *Teuchos::TimeMonitor::getNewTimer("RMM diagonal")));
418  }
419 
420  // Add diagonal entries if they are required.
421  // Check to make sure they are all here; add them if they are not.
422  if (requireDiagonal) {
423  if (dist->DistType() == MMFile) {
424  // All diagonal entries should be present in the file; we cannot create
425  // them for file-based data distributions.
426  // Do an error check to ensure they are all there.
427  size_t localss = diagset.size();
428  size_t globalss;
429  Teuchos::reduceAll<int, size_t>(*comm, Teuchos::REDUCE_SUM, 1,
430  &localss, &globalss);
431  TEUCHOS_TEST_FOR_EXCEPTION(globalss != nRow, std::logic_error,
432  "File-based nonzero distribution requires all diagonal "
433  << "entries to be present in the file. \n"
434  << "Number of diagonal entries in file = " << globalss << "\n"
435  << "Number of matrix rows = " << nRow << "\n");
436  }
437  else {
438  for (global_ordinal_type i = 0;
439  i < static_cast<global_ordinal_type>(nRow); i++)
440  {
441  if (dist->Mine(i,i)) {
442  if (diagset.find(i) == diagset.end()) {
443  nzindex_t idx = std::make_pair(i,i);
444  localNZ[idx] = 0;
445  }
446  }
447  }
448  }
449  }
450  // Done with diagset; free its memory
451  std::set<global_ordinal_type>().swap(diagset);
452 
453  if (useTimers)
454  timer = Teuchos::null;
455 }
456 
459 // (This format is used by the upcoming readBinary function) //
461 //
462 // File format:
463 // #vertices #edges src_1 dst_1 src_2 dst_2 ... src_#edges dst_#edges
464 //
465 // Types:
466 // #edges: unsigned long long
467 // everything else: unsigned int
468 //
469 // Base of indexing:
470 // 1
471 //
473 //
474 // A code example that converts MatrixMarket format into this format:
475 //
476 // typedef unsigned int ord_type;
477 // typedef unsigned long long size_type;
478 //
479 // std::string line;
480 // std::ifstream in(infilename);
481 // std::ofstream out(outfilename, std::ios::out | std::ios::binary);
482 //
483 // ord_type nv, dummy, edge[2];
484 // size_type ne;
485 //
486 // do
487 // std::getline (in, line);
488 // while(line[0] == '%');
489 //
490 // std::stringstream ss(line);
491 // ss >> nv >> dummy >> ne;
492 // out.write((char *)&nv, sizeof(ord_type));
493 // out.write((char *)&ne, sizeof(size_type));
494 //
495 // while(std::getline(in, line)) {
496 // std::stringstream ss2(line);
497 // ss2 >> edge[0] >> edge[1];
498 // out.write((char *)edge, sizeof(ord_type)*2);
499 //
500 // }
501 // in.close();
502 // out.close();
503 //
505 
506 static
507 void
508 readBinary(
509  const std::string &filename, // MatrixMarket file to read
510  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
511  const Teuchos::ParameterList &params,
512  size_t &nRow,
513  size_t &nCol,
514  typename Distribution<global_ordinal_type,scalar_type>::LocalNZmap_t &localNZ,
515  Teuchos::RCP<Distribution<global_ordinal_type,scalar_type> > &dist
516 )
517 {
518 
519  int me = comm->getRank();
520 
521  bool verbose = false; // Print status as reading
522  {
523  const Teuchos::ParameterEntry *pe = params.getEntryPtr("verbose");
524  if (pe != NULL)
525  verbose = pe->getValue<bool>(&verbose);
526  }
527 
528  size_t chunkSize = 500000; // Number of lines to read / broadcast at once
529  {
530  const Teuchos::ParameterEntry *pe = params.getEntryPtr("chunkSize");
531  if (pe != NULL)
532  chunkSize = pe->getValue<size_t>(&chunkSize);
533  }
534 
535  bool symmetrize = false; // Symmetrize the matrix
536  {
537  const Teuchos::ParameterEntry *pe = params.getEntryPtr("symmetrize");
538  if (pe != NULL)
539  symmetrize = pe->getValue<bool>(&symmetrize);
540  }
541 
542  bool transpose = false; // Store the transpose
543  {
544  const Teuchos::ParameterEntry *pe = params.getEntryPtr("transpose");
545  if (pe != NULL)
546  transpose = pe->getValue<bool>(&transpose);
547  }
548 
549  std::string diagonal = ""; // Are diagonal entries required (so add them)
550  // or ignored (so delete them) or neither?
551  // Default is neither.
552  {
553  const Teuchos::ParameterEntry *pe = params.getEntryPtr("diagonal");
554  if (pe != NULL)
555  diagonal = pe->getValue<std::string>(&diagonal);
556  }
557  bool ignoreDiagonal = (diagonal == "exclude");
558  bool requireDiagonal = (diagonal == "require");
559 
560  std::string distribution = "1D"; // Default distribution is 1D row-based
561  {
562  const Teuchos::ParameterEntry *pe = params.getEntryPtr("distribution");
563  if (pe != NULL)
564  distribution = pe->getValue<std::string>(&distribution);
565  }
566 
567  if (verbose && me == 0)
568  std::cout << "Reading binary file... " << filename << std::endl;
569 
570  FILE *fp = NULL;
571  size_t dim[3] = {0,0,0}; // Expected to read nRow and nNz, nCol = nRow
572 
573  if (me == 0) {
574 
575  fp = fopen(filename.c_str(), "rb");
576 
577  if (fp == NULL) {
578  std::cout << "Error: cannot open file " << filename << std::endl;
579  }
580  else {
581  // The header in the binary file contains only numVertices and numEdges
582  unsigned int nv = 0;
583  unsigned long long ne = 0;
584  fread(&nv, sizeof(unsigned int), 1, fp);
585  fread(&ne, sizeof(unsigned long long), 1, fp);
586  dim[0] = nv;
587  dim[1] = dim[0];
588  dim[2] = ne;
589  }
590  }
591 
592  // Broadcast relevant info
593  // Bad input if dim[0] or dim[1] still is zero after broadcast;
594  // all procs throw together
595  Teuchos::broadcast<int, size_t>(*comm, 0, 3, dim);
596  if (dim[0] == 0 || dim[1] == 0) {
597  throw std::runtime_error("Error: bad matrix header information");
598  }
599 
600  nRow = dim[0];
601  nCol = dim[1];
602  size_t nNz = dim[2];
603 
604  if (verbose && me == 0)
605  std::cout << "Binary file... "
606  << "\n symmetrize = " << symmetrize
607  << "\n transpose = " << transpose
608  << "\n change diagonal = " << diagonal
609  << "\n distribution = " << distribution
610  << std::endl;
611 
612  // Create distribution based on nRow, nCol, npRow, npCol
613  dist = buildDistribution<global_ordinal_type,scalar_type>(distribution,
614  nRow, nCol, params,
615  comm);
616 
617  std::set<global_ordinal_type> diagset;
618  // If diagonal == require, this set keeps track of
619  // which diagonal entries already existing so we can
620  // add those that don't
621 
622  using nzindex_t =
623  typename Distribution<global_ordinal_type,scalar_type>::NZindex_t;
624 
625  // Chunk information and buffers
626  unsigned int *buffer = new unsigned int[chunkSize*2];
627  size_t nChunk;
628  size_t nMillion = 0;
629  size_t nRead = 0;
630  size_t rlen;
631  const scalar_type ONE = Teuchos::ScalarTraits<scalar_type>::one();
632 
633 
634  // Read chunks until the entire file is read
635  while (nRead < nNz) {
636  if (nNz-nRead > chunkSize) nChunk = chunkSize;
637  else nChunk = (nNz - nRead);
638 
639  // Processor 0 reads a chunk
640  if (me == 0) {
641  size_t ret = 0;
642  rlen = 0;
643  for (size_t i = 0; i < nChunk; i++) {
644  ret = fread(&buffer[rlen], sizeof(unsigned int), 2, fp);
645  if (ret == 0) {
646  std::cout << "Unexpected end of matrix file." << std::endl;
647  std::cout.flush();
648  exit(-1);
649  }
650  rlen += 2;
651  }
652  }
653 
654  // Processor 0 broadcasts the chunk
655  Teuchos::broadcast<int, size_t>(*comm, 0, 1, &rlen);
656  Teuchos::broadcast<int, unsigned int>(*comm, 0, rlen, buffer);
657 
658  nRead += nChunk;
659 
660  // All processors check the received data, saving nonzeros belonging to them
661  for (rlen = 0; rlen < nChunk; rlen++) {
662 
663  global_ordinal_type I = buffer[2*rlen]-1;
664  global_ordinal_type J = buffer[2*rlen+1]-1;
665 
666  // Special processing of nonzero
667  if ((I == J) && ignoreDiagonal) continue;
668 
669  if (transpose) std::swap<global_ordinal_type>(I,J);
670 
671  // Add nonzero (I,J) to the map if it should be on this processor
672  // Some file-based distributions have processor assignment stored as
673  // the non-zero's value, so pass the value to Mine.
674  if (dist->Mine(I,J,ONE)) {
675  nzindex_t idx = std::make_pair(I,J);
676  localNZ[idx] = ONE; // For now, the input binary format does not
677  // support numeric values, so we insert one.
678  if (requireDiagonal && (I == J)) diagset.insert(I);
679  }
680 
681  // If symmetrizing, add (J,I) to the map if it should be on this processor
682  // Some file-based distributions have processor assignment stored as
683  // the non-zero's value, so pass the value to Mine.
684  if (symmetrize && (I != J) && dist->Mine(J,I,ONE)) {
685  // Add entry (J, I) if need to symmetrize
686  // This processor keeps this non-zero.
687  nzindex_t idx = std::make_pair(J,I);
688  localNZ[idx] = ONE;
689  }
690  }
691 
692  // Status check
693  if (verbose) {
694  if (nRead / 1000000 > nMillion) {
695  nMillion++;
696  if (me == 0) std::cout << nMillion << "M ";
697  }
698  }
699  }
700 
701  if (verbose && me == 0)
702  std::cout << std::endl << nRead << " nonzeros read " << std::endl;
703 
704  if (fp != NULL) fclose(fp);
705  delete [] buffer;
706 
707  // Add diagonal entries if they are required.
708  // Check to make sure they are all here; add them if they are not.
709  if (requireDiagonal) {
710  if (dist->DistType() == MMFile) {
711  // All diagonal entries should be present in the file; we cannot create
712  // them for file-based data distributions.
713  // Do an error check to ensure they are all there.
714  size_t localss = diagset.size();
715  size_t globalss;
716  Teuchos::reduceAll<int, size_t>(*comm, Teuchos::REDUCE_SUM, 1,
717  &localss, &globalss);
718  TEUCHOS_TEST_FOR_EXCEPTION(globalss != nRow, std::logic_error,
719  "File-based nonzero distribution requires all diagonal "
720  << "entries to be present in the file. \n"
721  << "Number of diagonal entries in file = " << globalss << "\n"
722  << "Number of matrix rows = " << nRow << "\n");
723  }
724  else {
725  for (global_ordinal_type i = 0;
726  i < static_cast<global_ordinal_type>(nRow); i++)
727  {
728  if (dist->Mine(i,i)) {
729  if (diagset.find(i) == diagset.end()) {
730  nzindex_t idx = std::make_pair(i,i);
731  localNZ[idx] = 0;
732  }
733  }
734  }
735  }
736  }
737  // Done with diagset; free its memory
738  std::set<global_ordinal_type>().swap(diagset);
739 
740 }
741 
744 // (This format is used by the upcoming readPerProcessBinary function) //
746 //
747 //
748 // FILE FORMAT:
749 // globalNumRows globalNumCols localNumNnzs row_1 col_1 ... row_n col_n,
750 // where n = localNumNnzs
751 //
752 //
753 // ASSUMPTIONS:
754 // - The nonzeros should be sorted by their row indices within each file.
755 // - Nonzeros have global row and column indices.
756 // - The user specifies the base <filename>, where rank i reads <filename>.i.cooBin.
757 //
758 //
759 // TYPES:
760 // localNumNnzs: unsigned long long
761 // everything else: unsigned int
762 //
763 //
764 // BASE OF INDEXING: 1
765 //
766 //
768 static
769 void
770 readPerProcessBinary(
771  const std::string &filename,
772  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
773  const Teuchos::ParameterList &params,
774  size_t &nRow,
775  size_t &nCol,
776  typename Distribution<global_ordinal_type,scalar_type>::LocalNZmap_t &localNZ,
777  Teuchos::RCP<Distribution<global_ordinal_type,scalar_type> > &dist,
778  unsigned int* &buffer,
779  size_t &nNz
780 )
781 {
782 
783  int me = comm->getRank();
784 
785  bool verbose = false; // Print status as reading
786  {
787  const Teuchos::ParameterEntry *pe = params.getEntryPtr("verbose");
788  if (pe != NULL)
789  verbose = pe->getValue<bool>(&verbose);
790  }
791 
792  std::string distribution = "1D"; // Default distribution is 1D row-based
793  {
794  const Teuchos::ParameterEntry *pe = params.getEntryPtr("distribution");
795  if (pe != NULL)
796  distribution = pe->getValue<std::string>(&distribution);
797  }
798 
799  if (verbose && me == 0)
800  std::cout << "Reading per-process binary files... " << filename << std::endl;
801 
802 
803  std::string rankFileName = filename + "." + std::to_string(me) + ".cooBin";
804  FILE *fp = NULL;
805 
806  fp = fopen(rankFileName.c_str(), "rb");
807  if (fp == NULL) {
808  std::cout << "Error: cannot open file " << filename << std::endl;
809  throw std::runtime_error("Error: non-existing input file: " + rankFileName);
810  }
811 
812  // The header in each per-process file: globalNumRows globalNumCols localNumNonzeros
813  unsigned int globalNumRows = 0, globalNumCols = 0;
814  unsigned long long localNumNonzeros = 0;
815  fread(&globalNumRows, sizeof(unsigned int), 1, fp);
816  fread(&globalNumCols, sizeof(unsigned int), 1, fp);
817  fread(&localNumNonzeros, sizeof(unsigned long long), 1, fp);
818 
819  nRow = static_cast<size_t>(globalNumRows);
820  nCol = static_cast<size_t>(globalNumCols);
821  nNz = static_cast<size_t>(localNumNonzeros);
822 
823  // Fill the simple buffer array instead of a std::map
824  // S. Acer: With large graphs, we can't afford std::map
825  buffer = new unsigned int[nNz*2];
826 
827  if(nNz > 0) {
828  size_t ret = fread(buffer, sizeof(unsigned int), 2*nNz, fp);
829  if (ret == 0) {
830  std::cout << "Unexpected end of matrix file: " << rankFileName << std::endl;
831  std::cout.flush();
832  delete [] buffer;
833  exit(-1);
834  }
835  }
836  if (fp != NULL) fclose(fp);
837 
838  // This barrier is not necessary but useful for debugging
839  comm->barrier();
840  if(verbose && me == 0)
841  std::cout << "All ranks finished reading their nonzeros from their individual files\n";
842 
843  // Create distribution based on nRow, nCol, npRow, npCol
844  dist = buildDistribution<global_ordinal_type,scalar_type>(distribution,
845  nRow, nCol, params,
846  comm);
847 }
848 
849 public:
850 
851 // This is the default interface.
852 static Teuchos::RCP<sparse_matrix_type>
853 readSparseFile(
854  const std::string &filename, // MatrixMarket file to read
855  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
856  const Teuchos::ParameterList &params
857 )
858 {
859  Teuchos::RCP<Distribution<global_ordinal_type,scalar_type> > dist;
860  return readSparseFile(filename, comm, params, dist);
861 }
862 
863 // This version has the Distribution object as an output parameter.
864 // S. Acer needs the distribution object to get the chunk cuts from
865 // LowerTriangularBlock distribution.
866 static Teuchos::RCP<sparse_matrix_type>
867 readSparseFile(
868  const std::string &filename, // MatrixMarket file to read
869  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
870  const Teuchos::ParameterList &params,
871  Teuchos::RCP<Distribution<global_ordinal_type,scalar_type> > &dist
872 )
873 {
874  bool useTimers = false; // should we time various parts of the reader?
875  {
876  const Teuchos::ParameterEntry *pe = params.getEntryPtr("useTimers");
877  if (pe != NULL)
878  useTimers = pe->getValue<bool>(&useTimers);
879  }
880 
881  Teuchos::RCP<Teuchos::TimeMonitor> timer = Teuchos::null;
882  if (useTimers)
883  timer = rcp(new Teuchos::TimeMonitor(
884  *Teuchos::TimeMonitor::getNewTimer("RSF parameterSetup")));
885 
886  int me = comm->getRank();
887 
888  // Check parameters to determine how to process the matrix while reading
889  // TODO: Add validators for the parameters
890 
891  bool verbose = false; // Print status as reading
892  {
893  const Teuchos::ParameterEntry *pe = params.getEntryPtr("verbose");
894  if (pe != NULL)
895  verbose = pe->getValue<bool>(&verbose);
896  }
897 
898  bool callFillComplete = true; // should we fillComplete the new CrsMatrix?
899  {
900  const Teuchos::ParameterEntry *pe = params.getEntryPtr("callFillComplete");
901  if (pe != NULL)
902  callFillComplete = pe->getValue<bool>(&callFillComplete);
903  }
904 
905  // Don't want to use MatrixMarket's coordinate reader, because don't want
906  // entire matrix on one processor.
907  // Instead, Proc 0 reads nonzeros in chunks and broadcasts chunks to all
908  // processors.
909  // All processors insert nonzeros they own into a std::map
910 
911  // Storage for this processor's nonzeros.
912  using localNZmap_t =
913  typename Distribution<global_ordinal_type,scalar_type>::LocalNZmap_t;
914  localNZmap_t localNZ;
915 
916  bool binary = false; // should we read a binary file?
917  {
918  const Teuchos::ParameterEntry *pe = params.getEntryPtr("binary");
919  if (pe != NULL)
920  binary = pe->getValue<bool>(&binary);
921  }
922 
923  bool readPerProcess = false; // should we read a separate file per process?
924  {
925  const Teuchos::ParameterEntry *pe = params.getEntryPtr("readPerProcess");
926  if (pe != NULL)
927  readPerProcess = pe->getValue<bool>(&readPerProcess);
928  }
929 
930  if (useTimers) {
931  const char *timername = (binary?"RSF readBinary":"RSF readMatrixMarket");
932  timer = Teuchos::null;
933  timer = rcp(new Teuchos::TimeMonitor(
934  *Teuchos::TimeMonitor::getNewTimer(timername)));
935  }
936 
937  // Read nonzeros from the given file(s)
938  size_t nRow = 0, nCol = 0;
939  unsigned int *buffer; size_t nNz = 0;
940  if(binary){
941  if(readPerProcess)
942  readPerProcessBinary(filename, comm, params, nRow, nCol, localNZ, dist, buffer, nNz);
943  else
944  readBinary(filename, comm, params, nRow, nCol, localNZ, dist);
945  }
946  else
947  readMatrixMarket(filename, comm, params, nRow, nCol, localNZ, dist);
948 
949  if(readPerProcess == false){
950 
951  // Redistribute nonzeros as needed to satisfy the Distribution
952  // For most Distributions, this is a no-op
953  if (useTimers) {
954  timer = Teuchos::null;
955  timer = rcp(new Teuchos::TimeMonitor(
956  *Teuchos::TimeMonitor::getNewTimer("RSF redistribute")));
957  }
958 
959  dist->Redistribute(localNZ);
960  }
961 
962  if (useTimers) {
963  timer = Teuchos::null;
964  timer = rcp(new Teuchos::TimeMonitor(
965  *Teuchos::TimeMonitor::getNewTimer("RSF nonzerosConstruction")));
966  }
967 
968  // Now construct the matrix.
969  // Count number of entries in each row for best use of StaticProfile
970  if (verbose && me == 0)
971  std::cout << std::endl << "Constructing the matrix" << std::endl;
972 
973  Teuchos::Array<global_ordinal_type> rowIdx;
974  Teuchos::Array<size_t> nnzPerRow;
975  Teuchos::Array<global_ordinal_type> colIdx;
976  Teuchos::Array<scalar_type> val;
977  Teuchos::Array<size_t> offsets;
978 
979  if(readPerProcess) {
980  global_ordinal_type prevI = std::numeric_limits<global_ordinal_type>::max();
981  for (size_t it = 0; it < nNz; it++){
982  global_ordinal_type I = buffer[2*it]-1;
983  global_ordinal_type J = buffer[2*it+1]-1;
984  if (prevI != I) {
985  prevI = I;
986  rowIdx.push_back(I);
987  nnzPerRow.push_back(0);
988  }
989  nnzPerRow.back()++;
990  colIdx.push_back(J);
991  }
992  delete [] buffer;
993  }
994  else {
995  // Exploit fact that map has entries sorted by I, then J
996  global_ordinal_type prevI = std::numeric_limits<global_ordinal_type>::max();
997  for (auto it = localNZ.begin(); it != localNZ.end(); it++) {
998  global_ordinal_type I = it->first.first;
999  global_ordinal_type J = it->first.second;
1000  scalar_type V = it->second;
1001  if (prevI != I) {
1002  prevI = I;
1003  rowIdx.push_back(I);
1004  nnzPerRow.push_back(0);
1005  }
1006  nnzPerRow.back()++;
1007  colIdx.push_back(J);
1008  val.push_back(V);
1009  }
1010 
1011  // Done with localNZ map; free it.
1012  localNZmap_t().swap(localNZ);
1013  }
1014 
1015  // Compute prefix sum in offsets array
1016  offsets.resize(rowIdx.size() + 1);
1017  offsets[0] = 0;
1018  for (size_t row = 0; row < static_cast<size_t>(rowIdx.size()); row++)
1019  offsets[row+1] = offsets[row] + nnzPerRow[row];
1020 
1021  if (useTimers) {
1022  timer = Teuchos::null;
1023  timer = rcp(new Teuchos::TimeMonitor(
1024  *Teuchos::TimeMonitor::getNewTimer("RSF insertNonzeros")));
1025  }
1026 
1027  // Create a new RowMap with only rows having non-zero entries.
1028  size_t dummy = Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid();
1029  Teuchos::RCP<const Tpetra::Map<> > rowMap =
1030  Teuchos::rcp(new Tpetra::Map<>(dummy, rowIdx(), 0, comm));
1031 
1032  Teuchos::RCP<sparse_matrix_type> A =
1033  rcp(new sparse_matrix_type(rowMap, nnzPerRow()));
1034 
1035  // Insert the global values into the matrix row-by-row.
1036  if (verbose && me == 0)
1037  std::cout << "Inserting global values" << std::endl;
1038 
1039  if(readPerProcess){
1040  const scalar_type ONE = Teuchos::ScalarTraits<scalar_type>::one();
1041  for (int i = 0; i < rowIdx.size(); i++) {
1042  size_t nnz = nnzPerRow[i];
1043  size_t off = offsets[i];
1044  val.assign(nnz, ONE);
1045  // ReadPerProcess routine does not read any numeric values from the file,
1046  // So we insert dummy values here.
1047  A->insertGlobalValues(rowIdx[i], colIdx(off, nnz), val());
1048  }
1049  }
1050  else {
1051  for (int i = 0; i < rowIdx.size(); i++) {
1052  size_t nnz = nnzPerRow[i];
1053  size_t off = offsets[i];
1054  A->insertGlobalValues(rowIdx[i], colIdx(off, nnz), val(off, nnz));
1055  }
1056  }
1057 
1058  // free memory that is no longer needed
1059  Teuchos::Array<size_t>().swap(nnzPerRow);
1060  Teuchos::Array<size_t>().swap(offsets);
1061  Teuchos::Array<global_ordinal_type>().swap(rowIdx);
1062  Teuchos::Array<global_ordinal_type>().swap(colIdx);
1063  Teuchos::Array<scalar_type>().swap(val);
1064 
1065  if (useTimers)
1066  timer = Teuchos::null;
1067 
1068  if (callFillComplete) {
1069 
1070  if (verbose && me == 0)
1071  std::cout << "Building vector maps" << std::endl;
1072 
1073  if (useTimers) {
1074  timer = Teuchos::null;
1075  timer = rcp(new Teuchos::TimeMonitor(
1076  *Teuchos::TimeMonitor::getNewTimer("RSF buildVectorMaps")));
1077  }
1078 
1079  // Build domain map that corresponds to distribution
1080  Teuchos::Array<global_ordinal_type> vectorSet;
1081  for (global_ordinal_type i = 0;
1082  i < static_cast<global_ordinal_type>(nCol); i++)
1083  if (dist->VecMine(i)) vectorSet.push_back(i);
1084 
1085  dummy = Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid();
1086  Teuchos::RCP<const Tpetra::Map<> > domainMap =
1087  Teuchos::rcp(new Tpetra::Map<>(dummy, vectorSet(), 0, comm));
1088 
1089  Teuchos::Array<global_ordinal_type>().swap(vectorSet);
1090 
1091  // Build range map that corresponds to distribution
1092  for (global_ordinal_type i = 0;
1093  i < static_cast<global_ordinal_type>(nRow); i++)
1094  if (dist->VecMine(i)) vectorSet.push_back(i);
1095 
1096  dummy = Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid();
1097  Teuchos::RCP<const Tpetra::Map<> > rangeMap =
1098  Teuchos::rcp(new Tpetra::Map<>(dummy, vectorSet(), 0, comm));
1099 
1100  Teuchos::Array<global_ordinal_type>().swap(vectorSet);
1101 
1102  // FillComplete the matrix
1103  if (useTimers) {
1104  timer = Teuchos::null;
1105  timer = rcp(new Teuchos::TimeMonitor(
1106  *Teuchos::TimeMonitor::getNewTimer("RSF fillComplete")));
1107  }
1108 
1109  if (verbose && me == 0)
1110  std::cout << "Calling fillComplete" << std::endl;
1111 
1112  A->fillComplete(domainMap, rangeMap);
1113 
1114  if (useTimers)
1115  timer = Teuchos::null;
1116 
1117  if (verbose) {
1118  std::cout << "\nRank " << A->getComm()->getRank()
1119  << " nRows " << A->getNodeNumRows()
1120  << " minRow " << A->getRowMap()->getMinGlobalIndex()
1121  << " maxRow " << A->getRowMap()->getMaxGlobalIndex()
1122  << "\nRank " << A->getComm()->getRank()
1123  << " nCols " << A->getNodeNumCols()
1124  << " minCol " << A->getColMap()->getMinGlobalIndex()
1125  << " maxCol " << A->getColMap()->getMaxGlobalIndex()
1126  << "\nRank " << A->getComm()->getRank()
1127  << " nDomain " << A->getDomainMap()->getNodeNumElements()
1128  << " minDomain " << A->getDomainMap()->getMinGlobalIndex()
1129  << " maxDomain " << A->getDomainMap()->getMaxGlobalIndex()
1130  << "\nRank " << A->getComm()->getRank()
1131  << " nRange " << A->getRangeMap()->getNodeNumElements()
1132  << " minRange " << A->getRangeMap()->getMinGlobalIndex()
1133  << " maxRange " << A->getRangeMap()->getMaxGlobalIndex()
1134  << "\nRank " << A->getComm()->getRank()
1135  << " nEntries " << A->getNodeNumEntries()
1136  << std::endl;
1137  }
1138  }
1139 
1140  return A;
1141 }
1142 
1143 #endif
1144 
A parallel distribution of indices over processes.