Tpetra parallel linear algebra  Version of the Day
Tpetra_Map_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
44 
45 #ifndef TPETRA_MAP_DEF_HPP
46 #define TPETRA_MAP_DEF_HPP
47 
48 #include "Tpetra_Directory.hpp" // must include for implicit instantiation to work
50 #include "Tpetra_Details_FixedHashTable.hpp"
53 #include "Tpetra_Core.hpp"
54 #include "Tpetra_Util.hpp"
55 #include "Teuchos_as.hpp"
56 #include "Teuchos_TypeNameTraits.hpp"
57 #include "Teuchos_CommHelpers.hpp"
58 #include "Tpetra_Details_mpiIsInitialized.hpp"
59 #include "Tpetra_Details_extractMpiCommFromTeuchos.hpp" // teuchosCommIsAnMpiComm
61 #include <memory>
62 #include <sstream>
63 #include <stdexcept>
64 #include <typeinfo>
65 
66 namespace { // (anonymous)
67 
68  template<class ExecutionSpace>
69  void
70  checkMapInputArray (const char ctorName[],
71  const void* indexList,
72  const size_t indexListSize,
73  const ExecutionSpace& execSpace,
74  const Teuchos::Comm<int>* const comm)
75  {
77 
78  const bool debug = Behavior::debug("Map");
79  if (debug) {
80  using Teuchos::outArg;
81  using Teuchos::REDUCE_MIN;
82  using Teuchos::reduceAll;
83  using std::endl;
84 
85  const int myRank = comm == nullptr ? 0 : comm->getRank ();
86  const bool verbose = Behavior::verbose("Map");
87  std::ostringstream lclErrStrm;
88  int lclSuccess = 1;
89 
90  if (indexListSize != 0 && indexList == nullptr) {
91  lclSuccess = 0;
92  if (verbose) {
93  lclErrStrm << "Proc " << myRank << ": indexList is null, "
94  "but indexListSize=" << indexListSize << " != 0." << endl;
95  }
96  }
97  int gblSuccess = 0; // output argument
98  reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
99  if (gblSuccess != 1) {
100  std::ostringstream gblErrStrm;
101  gblErrStrm << "Tpetra::Map constructor " << ctorName <<
102  " detected a problem with the input array "
103  "(raw array, Teuchos::ArrayView, or Kokkos::View) "
104  "of global indices." << endl;
105  if (verbose) {
106  using ::Tpetra::Details::gathervPrint;
107  gathervPrint (gblErrStrm, lclErrStrm.str (), *comm);
108  }
109  TEUCHOS_TEST_FOR_EXCEPTION
110  (true, std::invalid_argument, gblErrStrm.str ());
111  }
112  }
113  }
114 } // namespace (anonymous)
115 
116 namespace Tpetra {
117 
118  template <class LocalOrdinal, class GlobalOrdinal, class Node>
120  Map () :
121  comm_ (new Teuchos::SerialComm<int> ()),
122  indexBase_ (0),
123  numGlobalElements_ (0),
124  numLocalElements_ (0),
125  minMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
126  maxMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
127  minAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
128  maxAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
129  firstContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
130  lastContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
131  uniform_ (false), // trivially
132  contiguous_ (false),
133  distributed_ (false), // no communicator yet
134  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
135  {
137  }
138 
139 
140  template <class LocalOrdinal, class GlobalOrdinal, class Node>
142  Map (const global_size_t numGlobalElements,
143  const global_ordinal_type indexBase,
144  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
145  const LocalGlobal lOrG) :
146  comm_ (comm),
147  uniform_ (true),
148  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
149  {
150  using Teuchos::as;
151  using Teuchos::broadcast;
152  using Teuchos::outArg;
153  using Teuchos::reduceAll;
154  using Teuchos::REDUCE_MIN;
155  using Teuchos::REDUCE_MAX;
156  using Teuchos::typeName;
157  using std::endl;
158  using GO = global_ordinal_type;
159  using GST = global_size_t;
160  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
161  const char funcName[] = "Map(gblNumInds,indexBase,comm,LG)";
162  const char exPfx[] =
163  "Tpetra::Map::Map(gblNumInds,indexBase,comm,LG): ";
164 
165  const bool debug = Details::Behavior::debug("Map");
166  const bool verbose = Details::Behavior::verbose("Map");
167  std::unique_ptr<std::string> prefix;
168  if (verbose) {
169  prefix = Details::createPrefix(
170  comm_.getRawPtr(), "Map", funcName);
171  std::ostringstream os;
172  os << *prefix << "Start" << endl;
173  std::cerr << os.str();
174  }
176 
177  // In debug mode only, check whether numGlobalElements and
178  // indexBase are the same over all processes in the communicator.
179  if (debug) {
180  GST proc0NumGlobalElements = numGlobalElements;
181  broadcast(*comm_, 0, outArg(proc0NumGlobalElements));
182  GST minNumGlobalElements = numGlobalElements;
183  GST maxNumGlobalElements = numGlobalElements;
184  reduceAll(*comm, REDUCE_MIN, numGlobalElements,
185  outArg(minNumGlobalElements));
186  reduceAll(*comm, REDUCE_MAX, numGlobalElements,
187  outArg(maxNumGlobalElements));
188  TEUCHOS_TEST_FOR_EXCEPTION
189  (minNumGlobalElements != maxNumGlobalElements ||
190  numGlobalElements != minNumGlobalElements,
191  std::invalid_argument, exPfx << "All processes must "
192  "provide the same number of global elements. Process 0 set "
193  "numGlobalElements="<< proc0NumGlobalElements << ". The "
194  "calling process " << comm->getRank() << " set "
195  "numGlobalElements=" << numGlobalElements << ". The min "
196  "and max values over all processes are "
197  << minNumGlobalElements << " resp. " << maxNumGlobalElements
198  << ".");
199 
200  GO proc0IndexBase = indexBase;
201  broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
202  GO minIndexBase = indexBase;
203  GO maxIndexBase = indexBase;
204  reduceAll(*comm, REDUCE_MIN, indexBase, outArg(minIndexBase));
205  reduceAll(*comm, REDUCE_MAX, indexBase, outArg(maxIndexBase));
206  TEUCHOS_TEST_FOR_EXCEPTION
207  (minIndexBase != maxIndexBase || indexBase != minIndexBase,
208  std::invalid_argument, exPfx << "All processes must "
209  "provide the same indexBase argument. Process 0 set "
210  "indexBase=" << proc0IndexBase << ". The calling process "
211  << comm->getRank() << " set indexBase=" << indexBase
212  << ". The min and max values over all processes are "
213  << minIndexBase << " resp. " << maxIndexBase << ".");
214  }
215 
216  // Distribute the elements across the processes in the given
217  // communicator so that global IDs (GIDs) are
218  //
219  // - Nonoverlapping (only one process owns each GID)
220  // - Contiguous (the sequence of GIDs is nondecreasing, and no two
221  // adjacent GIDs differ by more than one)
222  // - As evenly distributed as possible (the numbers of GIDs on two
223  // different processes do not differ by more than one)
224 
225  // All processes have the same numGlobalElements, but we still
226  // need to check that it is valid. numGlobalElements must be
227  // positive and not the "invalid" value (GSTI).
228  //
229  // This comparison looks funny, but it avoids compiler warnings
230  // for comparing unsigned integers (numGlobalElements_in is a
231  // GST, which is unsigned) while still working if we
232  // later decide to make GST signed.
233  TEUCHOS_TEST_FOR_EXCEPTION(
234  (numGlobalElements < 1 && numGlobalElements != 0),
235  std::invalid_argument, exPfx << "numGlobalElements (= "
236  << numGlobalElements << ") must be nonnegative.");
237 
238  TEUCHOS_TEST_FOR_EXCEPTION
239  (numGlobalElements == GSTI, std::invalid_argument, exPfx <<
240  "You provided numGlobalElements = Teuchos::OrdinalTraits<"
241  "Tpetra::global_size_t>::invalid(). This version of the "
242  "constructor requires a valid value of numGlobalElements. "
243  "You probably mistook this constructor for the \"contiguous "
244  "nonuniform\" constructor, which can compute the global "
245  "number of elements for you if you set numGlobalElements to "
246  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid().");
247 
248  size_t numLocalElements = 0; // will set below
249  if (lOrG == GloballyDistributed) {
250  // Compute numLocalElements:
251  //
252  // If numGlobalElements == numProcs * B + remainder,
253  // then Proc r gets B+1 elements if r < remainder,
254  // and B elements if r >= remainder.
255  //
256  // This strategy is valid for any value of numGlobalElements and
257  // numProcs, including the following border cases:
258  // - numProcs == 1
259  // - numLocalElements < numProcs
260  //
261  // In the former case, remainder == 0 && numGlobalElements ==
262  // numLocalElements. In the latter case, remainder ==
263  // numGlobalElements && numLocalElements is either 0 or 1.
264  const GST numProcs = static_cast<GST> (comm_->getSize ());
265  const GST myRank = static_cast<GST> (comm_->getRank ());
266  const GST quotient = numGlobalElements / numProcs;
267  const GST remainder = numGlobalElements - quotient * numProcs;
268 
269  GO startIndex;
270  if (myRank < remainder) {
271  numLocalElements = static_cast<size_t> (1) + static_cast<size_t> (quotient);
272  // myRank was originally an int, so it should never overflow
273  // reasonable GO types.
274  startIndex = as<GO> (myRank) * as<GO> (numLocalElements);
275  } else {
276  numLocalElements = as<size_t> (quotient);
277  startIndex = as<GO> (myRank) * as<GO> (numLocalElements) +
278  as<GO> (remainder);
279  }
280 
281  minMyGID_ = indexBase + startIndex;
282  maxMyGID_ = indexBase + startIndex + numLocalElements - 1;
283  minAllGID_ = indexBase;
284  maxAllGID_ = indexBase + numGlobalElements - 1;
285  distributed_ = (numProcs > 1);
286  }
287  else { // lOrG == LocallyReplicated
288  numLocalElements = as<size_t> (numGlobalElements);
289  minMyGID_ = indexBase;
290  maxMyGID_ = indexBase + numGlobalElements - 1;
291  distributed_ = false;
292  }
293 
294  minAllGID_ = indexBase;
295  maxAllGID_ = indexBase + numGlobalElements - 1;
296  indexBase_ = indexBase;
297  numGlobalElements_ = numGlobalElements;
298  numLocalElements_ = numLocalElements;
299  firstContiguousGID_ = minMyGID_;
300  lastContiguousGID_ = maxMyGID_;
301  contiguous_ = true;
302 
303  // Create the Directory on demand in getRemoteIndexList().
304  //setupDirectory ();
305 
306  if (verbose) {
307  std::ostringstream os;
308  os << *prefix << "Done" << endl;
309  std::cerr << os.str();
310  }
311  }
312 
313 
314  template <class LocalOrdinal, class GlobalOrdinal, class Node>
316  Map (const global_size_t numGlobalElements,
317  const size_t numLocalElements,
318  const global_ordinal_type indexBase,
319  const Teuchos::RCP<const Teuchos::Comm<int> > &comm) :
320  comm_ (comm),
321  uniform_ (false),
322  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
323  {
324  using Teuchos::as;
325  using Teuchos::broadcast;
326  using Teuchos::outArg;
327  using Teuchos::reduceAll;
328  using Teuchos::REDUCE_MIN;
329  using Teuchos::REDUCE_MAX;
330  using Teuchos::REDUCE_SUM;
331  using Teuchos::scan;
332  using std::endl;
333  using GO = global_ordinal_type;
334  using GST = global_size_t;
335  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
336  const char funcName[] =
337  "Map(gblNumInds,lclNumInds,indexBase,comm)";
338  const char exPfx[] =
339  "Tpetra::Map::Map(gblNumInds,lclNumInds,indexBase,comm): ";
340  const char suffix[] =
341  ". Please report this bug to the Tpetra developers.";
342 
343  const bool debug = Details::Behavior::debug("Map");
344  const bool verbose = Details::Behavior::verbose("Map");
345  std::unique_ptr<std::string> prefix;
346  if (verbose) {
347  prefix = Details::createPrefix(
348  comm_.getRawPtr(), "Map", funcName);
349  std::ostringstream os;
350  os << *prefix << "Start" << endl;
351  std::cerr << os.str();
352  }
354 
355  // Global sum of numLocalElements over all processes.
356  // Keep this for later debug checks.
357  GST debugGlobalSum {};
358  if (debug) {
359  debugGlobalSum = initialNonuniformDebugCheck(exPfx,
360  numGlobalElements, numLocalElements, indexBase, comm);
361  }
362 
363  // Distribute the elements across the nodes so that they are
364  // - non-overlapping
365  // - contiguous
366 
367  // This differs from the first Map constructor (that only takes a
368  // global number of elements) in that the user has specified the
369  // number of local elements, so that the elements are not
370  // (necessarily) evenly distributed over the processes.
371 
372  // Compute my local offset. This is an inclusive scan, so to get
373  // the final offset, we subtract off the input.
374  GO scanResult = 0;
375  scan<int, GO> (*comm, REDUCE_SUM, numLocalElements, outArg (scanResult));
376  const GO myOffset = scanResult - numLocalElements;
377 
378  if (numGlobalElements != GSTI) {
379  numGlobalElements_ = numGlobalElements; // Use the user's value.
380  }
381  else {
382  // Inclusive scan means that the last process has the final sum.
383  // Rather than doing a reduceAll to get the sum of
384  // numLocalElements, we can just have the last process broadcast
385  // its result. That saves us a round of log(numProcs) messages.
386  const int numProcs = comm->getSize ();
387  GST globalSum = scanResult;
388  if (numProcs > 1) {
389  broadcast (*comm, numProcs - 1, outArg (globalSum));
390  }
391  numGlobalElements_ = globalSum;
392 
393  if (debug) {
394  // No need for an all-reduce here; both come from collectives.
395  TEUCHOS_TEST_FOR_EXCEPTION
396  (globalSum != debugGlobalSum, std::logic_error, exPfx <<
397  "globalSum = " << globalSum << " != debugGlobalSum = " <<
398  debugGlobalSum << suffix);
399  }
400  }
401  numLocalElements_ = numLocalElements;
402  indexBase_ = indexBase;
403  minAllGID_ = (numGlobalElements_ == 0) ?
404  std::numeric_limits<GO>::max () :
405  indexBase;
406  maxAllGID_ = (numGlobalElements_ == 0) ?
407  std::numeric_limits<GO>::lowest () :
408  indexBase + GO(numGlobalElements_) - GO(1);
409  minMyGID_ = (numLocalElements_ == 0) ?
410  std::numeric_limits<GO>::max () :
411  indexBase + GO(myOffset);
412  maxMyGID_ = (numLocalElements_ == 0) ?
413  std::numeric_limits<GO>::lowest () :
414  indexBase + myOffset + GO(numLocalElements) - GO(1);
415  firstContiguousGID_ = minMyGID_;
416  lastContiguousGID_ = maxMyGID_;
417  contiguous_ = true;
418  distributed_ = checkIsDist ();
419 
420  // Create the Directory on demand in getRemoteIndexList().
421  //setupDirectory ();
422 
423  if (verbose) {
424  std::ostringstream os;
425  os << *prefix << "Done" << endl;
426  std::cerr << os.str();
427  }
428  }
429 
430  template <class LocalOrdinal, class GlobalOrdinal, class Node>
432  Map<LocalOrdinal,GlobalOrdinal,Node>::
433  initialNonuniformDebugCheck(
434  const char errorMessagePrefix[],
435  const global_size_t numGlobalElements,
436  const size_t numLocalElements,
437  const global_ordinal_type indexBase,
438  const Teuchos::RCP<const Teuchos::Comm<int>>& comm) const
439  {
440  const bool debug = Details::Behavior::debug("Map");
441  if (! debug) {
442  return global_size_t(0);
443  }
444 
445  using Teuchos::broadcast;
446  using Teuchos::outArg;
447  using Teuchos::ptr;
448  using Teuchos::REDUCE_MAX;
449  using Teuchos::REDUCE_MIN;
450  using Teuchos::REDUCE_SUM;
451  using Teuchos::reduceAll;
452  using GO = global_ordinal_type;
453  using GST = global_size_t;
454  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
455 
456  // The user has specified the distribution of indices over the
457  // processes. The distribution is not necessarily contiguous or
458  // equally shared over the processes.
459  //
460  // We assume that the number of local elements can be stored in a
461  // size_t. The instance member numLocalElements_ is a size_t, so
462  // this variable and that should have the same type.
463 
464  GST debugGlobalSum = 0; // Will be global sum of numLocalElements
465  reduceAll<int, GST> (*comm, REDUCE_SUM, static_cast<GST> (numLocalElements),
466  outArg (debugGlobalSum));
467  // In debug mode only, check whether numGlobalElements and
468  // indexBase are the same over all processes in the communicator.
469  {
470  GST proc0NumGlobalElements = numGlobalElements;
471  broadcast<int, GST> (*comm_, 0, outArg (proc0NumGlobalElements));
472  GST minNumGlobalElements = numGlobalElements;
473  GST maxNumGlobalElements = numGlobalElements;
474  reduceAll<int, GST> (*comm, REDUCE_MIN, numGlobalElements,
475  outArg (minNumGlobalElements));
476  reduceAll<int, GST> (*comm, REDUCE_MAX, numGlobalElements,
477  outArg (maxNumGlobalElements));
478  TEUCHOS_TEST_FOR_EXCEPTION
479  (minNumGlobalElements != maxNumGlobalElements ||
480  numGlobalElements != minNumGlobalElements,
481  std::invalid_argument, errorMessagePrefix << "All processes "
482  "must provide the same number of global elements, even if "
483  "that argument is "
484  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
485  "(which signals that the Map should compute the global "
486  "number of elements). Process 0 set numGlobalElements"
487  "=" << proc0NumGlobalElements << ". The calling process "
488  << comm->getRank() << " set numGlobalElements=" <<
489  numGlobalElements << ". The min and max values over all "
490  "processes are " << minNumGlobalElements << " resp. " <<
491  maxNumGlobalElements << ".");
492 
493  GO proc0IndexBase = indexBase;
494  broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
495  GO minIndexBase = indexBase;
496  GO maxIndexBase = indexBase;
497  reduceAll<int, GO> (*comm, REDUCE_MIN, indexBase, outArg (minIndexBase));
498  reduceAll<int, GO> (*comm, REDUCE_MAX, indexBase, outArg (maxIndexBase));
499  TEUCHOS_TEST_FOR_EXCEPTION
500  (minIndexBase != maxIndexBase || indexBase != minIndexBase,
501  std::invalid_argument, errorMessagePrefix <<
502  "All processes must provide the same indexBase argument. "
503  "Process 0 set indexBase = " << proc0IndexBase << ". The "
504  "calling process " << comm->getRank() << " set indexBase="
505  << indexBase << ". The min and max values over all "
506  "processes are " << minIndexBase << " resp. " << maxIndexBase
507  << ".");
508 
509  // Make sure that the sum of numLocalElements over all processes
510  // equals numGlobalElements.
511  TEUCHOS_TEST_FOR_EXCEPTION
512  (numGlobalElements != GSTI &&
513  debugGlobalSum != numGlobalElements, std::invalid_argument,
514  errorMessagePrefix << "The sum of each process' number of "
515  "indices over all processes, " << debugGlobalSum << ", != "
516  << "numGlobalElements=" << numGlobalElements << ". If you "
517  "would like this constructor to compute numGlobalElements "
518  "for you, you may set numGlobalElements="
519  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
520  "on input. Please note that this is NOT necessarily -1.");
521  }
522  return debugGlobalSum;
523  }
524 
525  template <class LocalOrdinal, class GlobalOrdinal, class Node>
526  void
527  Map<LocalOrdinal,GlobalOrdinal,Node>::
528  initWithNonownedHostIndexList(
529  const char errorMessagePrefix[],
530  const global_size_t numGlobalElements,
531  const Kokkos::View<const global_ordinal_type*,
532  Kokkos::LayoutLeft,
533  Kokkos::HostSpace,
534  Kokkos::MemoryUnmanaged>& entryList_host,
535  const global_ordinal_type indexBase,
536  const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
537  {
538  using Kokkos::LayoutLeft;
539  using Kokkos::subview;
540  using Kokkos::View;
541  using Kokkos::view_alloc;
542  using Kokkos::WithoutInitializing;
543  using Teuchos::as;
544  using Teuchos::broadcast;
545  using Teuchos::outArg;
546  using Teuchos::ptr;
547  using Teuchos::REDUCE_MAX;
548  using Teuchos::REDUCE_MIN;
549  using Teuchos::REDUCE_SUM;
550  using Teuchos::reduceAll;
551  using LO = local_ordinal_type;
552  using GO = global_ordinal_type;
553  using GST = global_size_t;
554  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
555 
556  // Make sure that Kokkos has been initialized (Github Issue #513).
557  TEUCHOS_TEST_FOR_EXCEPTION
558  (! Kokkos::is_initialized (), std::runtime_error,
559  errorMessagePrefix << "The Kokkos execution space "
560  << Teuchos::TypeNameTraits<execution_space>::name()
561  << " has not been initialized. "
562  "Please initialize it before creating a Map.")
563 
564  // The user has specified the distribution of indices over the
565  // processes, via the input array of global indices on each
566  // process. The distribution is not necessarily contiguous or
567  // equally shared over the processes.
568 
569  // The length of the input array on this process is the number of
570  // local indices to associate with this process, even though the
571  // input array contains global indices. We assume that the number
572  // of local indices on a process can be stored in a size_t;
573  // numLocalElements_ is a size_t, so this variable and that should
574  // have the same type.
575  const size_t numLocalElements(entryList_host.size());
576 
577  initialNonuniformDebugCheck(errorMessagePrefix, numGlobalElements,
578  numLocalElements, indexBase, comm);
579 
580  // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
581  // reduction is redundant, since the directory Map will have to do
582  // the same thing. Thus, we could do the scan and broadcast for
583  // the directory Map here, and give the computed offsets to the
584  // directory Map's constructor. However, a reduction costs less
585  // than a scan and broadcast, so this still saves time if users of
586  // this Map don't ever need the Directory (i.e., if they never
587  // call getRemoteIndexList on this Map).
588  if (numGlobalElements != GSTI) {
589  numGlobalElements_ = numGlobalElements; // Use the user's value.
590  }
591  else { // The user wants us to compute the sum.
592  reduceAll(*comm, REDUCE_SUM,
593  static_cast<GST>(numLocalElements),
594  outArg(numGlobalElements_));
595  }
596 
597  // mfh 20 Feb 2013: We've never quite done the right thing for
598  // duplicate GIDs here. Duplicate GIDs have always been counted
599  // distinctly in numLocalElements_, and thus should get a
600  // different LID. However, we've always used std::map or a hash
601  // table for the GID -> LID lookup table, so distinct GIDs always
602  // map to the same LID. Furthermore, the order of the input GID
603  // list matters, so it's not desirable to sort for determining
604  // uniqueness.
605  //
606  // I've chosen for now to write this code as if the input GID list
607  // contains no duplicates. If this is not desired, we could use
608  // the lookup table itself to determine uniqueness: If we haven't
609  // seen the GID before, it gets a new LID and it's added to the
610  // LID -> GID and GID -> LID tables. If we have seen the GID
611  // before, it doesn't get added to either table. I would
612  // implement this, but it would cost more to do the double lookups
613  // in the table (one to check, and one to insert).
614  //
615  // More importantly, since we build the GID -> LID table in (a
616  // thread-) parallel (way), the order in which duplicate GIDs may
617  // get inserted is not defined. This would make the assignment of
618  // LID to GID nondeterministic.
619 
620  numLocalElements_ = numLocalElements;
621  indexBase_ = indexBase;
622 
623  minMyGID_ = indexBase_;
624  maxMyGID_ = indexBase_;
625 
626  // NOTE (mfh 27 May 2015): While finding the initial contiguous
627  // GID range requires looking at all the GIDs in the range,
628  // dismissing an interval of GIDs only requires looking at the
629  // first and last GIDs. Thus, we could do binary search backwards
630  // from the end in order to catch the common case of a contiguous
631  // interval followed by noncontiguous entries. On the other hand,
632  // we could just expose this case explicitly as yet another Map
633  // constructor, and avoid the trouble of detecting it.
634  if (numLocalElements_ > 0) {
635  // Find contiguous GID range, with the restriction that the
636  // beginning of the range starts with the first entry. While
637  // doing so, fill in the LID -> GID table.
638  typename decltype (lgMap_)::non_const_type lgMap
639  (view_alloc ("lgMap", WithoutInitializing), numLocalElements_);
640  auto lgMap_host =
641  Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
642 
643  // The input array entryList_host is already on host, so we
644  // don't need to take a host view of it.
645  // auto entryList_host =
646  // Kokkos::create_mirror_view (Kokkos::HostSpace (), entryList);
647  // Kokkos::deep_copy (entryList_host, entryList);
648 
649  firstContiguousGID_ = entryList_host[0];
650  lastContiguousGID_ = firstContiguousGID_+1;
651 
652  // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
653  // anyway, so we have to look at them all. The logical way to
654  // find the first noncontiguous entry would thus be to "reduce,"
655  // where the local reduction result is whether entryList[i] + 1
656  // == entryList[i+1].
657 
658  lgMap_host[0] = firstContiguousGID_;
659  size_t i = 1;
660  for ( ; i < numLocalElements_; ++i) {
661  const GO curGid = entryList_host[i];
662  const LO curLid = as<LO> (i);
663 
664  if (lastContiguousGID_ != curGid) break;
665 
666  // Add the entry to the LID->GID table only after we know that
667  // the current GID is in the initial contiguous sequence, so
668  // that we don't repeat adding it in the first iteration of
669  // the loop below over the remaining noncontiguous GIDs.
670  lgMap_host[curLid] = curGid;
671  ++lastContiguousGID_;
672  }
673  --lastContiguousGID_;
674 
675  // [firstContiguousGID_, lastContigousGID_] is the initial
676  // sequence of contiguous GIDs. We can start the min and max
677  // GID using this range.
678  minMyGID_ = firstContiguousGID_;
679  maxMyGID_ = lastContiguousGID_;
680 
681  // Compute the GID -> LID lookup table, _not_ including the
682  // initial sequence of contiguous GIDs.
683  {
684  const std::pair<size_t, size_t> ncRange (i, entryList_host.extent (0));
685  auto nonContigGids_host = subview (entryList_host, ncRange);
686  TEUCHOS_TEST_FOR_EXCEPTION
687  (static_cast<size_t> (nonContigGids_host.extent (0)) !=
688  static_cast<size_t> (entryList_host.extent (0) - i),
689  std::logic_error, "Tpetra::Map noncontiguous constructor: "
690  "nonContigGids_host.extent(0) = "
691  << nonContigGids_host.extent (0)
692  << " != entryList_host.extent(0) - i = "
693  << (entryList_host.extent (0) - i) << " = "
694  << entryList_host.extent (0) << " - " << i
695  << ". Please report this bug to the Tpetra developers.");
696 
697  // FixedHashTable's constructor expects an owned device View,
698  // so we must deep-copy the subview of the input indices.
699  View<GO*, LayoutLeft, device_type>
700  nonContigGids (view_alloc ("nonContigGids", WithoutInitializing),
701  nonContigGids_host.size ());
702  Kokkos::deep_copy (nonContigGids, nonContigGids_host);
703 
704  glMap_ = global_to_local_table_type(nonContigGids,
705  firstContiguousGID_,
706  lastContiguousGID_,
707  static_cast<LO> (i));
708  // Make host version - when memory spaces match these just do trivial assignment
709  glMapHost_ = global_to_local_table_host_type(glMap_);
710  }
711 
712  // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
713  // table above, we have to look at all the (noncontiguous) input
714  // indices anyway. Thus, why not have the constructor compute
715  // and return the min and max?
716 
717  for ( ; i < numLocalElements_; ++i) {
718  const GO curGid = entryList_host[i];
719  const LO curLid = static_cast<LO> (i);
720  lgMap_host[curLid] = curGid; // LID -> GID table
721 
722  // While iterating through entryList, we compute its
723  // (process-local) min and max elements.
724  if (curGid < minMyGID_) {
725  minMyGID_ = curGid;
726  }
727  if (curGid > maxMyGID_) {
728  maxMyGID_ = curGid;
729  }
730  }
731 
732  // We filled lgMap on host above; now sync back to device.
733  Kokkos::deep_copy (lgMap, lgMap_host);
734 
735  // "Commit" the local-to-global lookup table we filled in above.
736  lgMap_ = lgMap;
737  // We've already created this, so use it.
738  lgMapHost_ = lgMap_host;
739  }
740  else {
741  minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
742  maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
743  // This insures tests for GIDs in the range
744  // [firstContiguousGID_, lastContiguousGID_] fail for processes
745  // with no local elements.
746  firstContiguousGID_ = indexBase_+1;
747  lastContiguousGID_ = indexBase_;
748  // glMap_ was default constructed, so it's already empty.
749  }
750 
751  // Compute the min and max of all processes' GIDs. If
752  // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
753  // are both indexBase_. This is wrong, but fixing it would
754  // require either a fancy sparse all-reduce, or a custom reduction
755  // operator that ignores invalid values ("invalid" means
756  // Tpetra::Details::OrdinalTraits<GO>::invalid()).
757  //
758  // Also, while we're at it, use the same all-reduce to figure out
759  // if the Map is distributed. "Distributed" means that there is
760  // at least one process with a number of local elements less than
761  // the number of global elements.
762  //
763  // We're computing the min and max of all processes' GIDs using a
764  // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
765  // and y are signed). (This lets us combine the min and max into
766  // a single all-reduce.) If each process sets localDist=1 if its
767  // number of local elements is strictly less than the number of
768  // global elements, and localDist=0 otherwise, then a MAX
769  // all-reduce on localDist tells us if the Map is distributed (1
770  // if yes, 0 if no). Thus, we can append localDist onto the end
771  // of the data and get the global result from the all-reduce.
772  if (std::numeric_limits<GO>::is_signed) {
773  // Does my process NOT own all the elements?
774  const GO localDist =
775  (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
776 
777  GO minMaxInput[3];
778  minMaxInput[0] = -minMyGID_;
779  minMaxInput[1] = maxMyGID_;
780  minMaxInput[2] = localDist;
781 
782  GO minMaxOutput[3];
783  minMaxOutput[0] = 0;
784  minMaxOutput[1] = 0;
785  minMaxOutput[2] = 0;
786  reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
787  minAllGID_ = -minMaxOutput[0];
788  maxAllGID_ = minMaxOutput[1];
789  const GO globalDist = minMaxOutput[2];
790  distributed_ = (comm_->getSize () > 1 && globalDist == 1);
791  }
792  else { // unsigned; use two reductions
793  // This is always correct, no matter the signedness of GO.
794  reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
795  reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
796  distributed_ = checkIsDist ();
797  }
798 
799  contiguous_ = false; // "Contiguous" is conservative.
800 
801  TEUCHOS_TEST_FOR_EXCEPTION(
802  minAllGID_ < indexBase_,
803  std::invalid_argument,
804  "Tpetra::Map constructor (noncontiguous): "
805  "Minimum global ID = " << minAllGID_ << " over all process(es) is "
806  "less than the given indexBase = " << indexBase_ << ".");
807 
808  // Create the Directory on demand in getRemoteIndexList().
809  //setupDirectory ();
810  }
811 
812  template <class LocalOrdinal, class GlobalOrdinal, class Node>
814  Map (const global_size_t numGlobalElements,
815  const GlobalOrdinal indexList[],
816  const LocalOrdinal indexListSize,
817  const GlobalOrdinal indexBase,
818  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
819  comm_ (comm),
820  uniform_ (false),
821  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
822  {
823  using std::endl;
824  const char funcName[] =
825  "Map(gblNumInds,indexList,indexListSize,indexBase,comm)";
826 
827  const bool verbose = Details::Behavior::verbose("Map");
828  std::unique_ptr<std::string> prefix;
829  if (verbose) {
830  prefix = Details::createPrefix(
831  comm_.getRawPtr(), "Map", funcName);
832  std::ostringstream os;
833  os << *prefix << "Start" << endl;
834  std::cerr << os.str();
835  }
837  checkMapInputArray ("(GST, const GO[], LO, GO, comm)",
838  indexList, static_cast<size_t> (indexListSize),
839  Kokkos::DefaultHostExecutionSpace (),
840  comm.getRawPtr ());
841  // Not quite sure if I trust all code to behave correctly if the
842  // pointer is nonnull but the array length is nonzero, so I'll
843  // make sure the raw pointer is null if the length is zero.
844  const GlobalOrdinal* const indsRaw = indexListSize == 0 ? NULL : indexList;
845  Kokkos::View<const GlobalOrdinal*,
846  Kokkos::LayoutLeft,
847  Kokkos::HostSpace,
848  Kokkos::MemoryUnmanaged> inds (indsRaw, indexListSize);
849  initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
850  indexBase, comm);
851  if (verbose) {
852  std::ostringstream os;
853  os << *prefix << "Done" << endl;
854  std::cerr << os.str();
855  }
856  }
857 
858  template <class LocalOrdinal, class GlobalOrdinal, class Node>
860  Map (const global_size_t numGlobalElements,
861  const Teuchos::ArrayView<const GlobalOrdinal>& entryList,
862  const GlobalOrdinal indexBase,
863  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
864  comm_ (comm),
865  uniform_ (false),
866  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
867  {
868  using std::endl;
869  const char funcName[] =
870  "Map(gblNumInds,entryList(Teuchos::ArrayView),indexBase,comm)";
871 
872  const bool verbose = Details::Behavior::verbose("Map");
873  std::unique_ptr<std::string> prefix;
874  if (verbose) {
875  prefix = Details::createPrefix(
876  comm_.getRawPtr(), "Map", funcName);
877  std::ostringstream os;
878  os << *prefix << "Start" << endl;
879  std::cerr << os.str();
880  }
882  const size_t numLclInds = static_cast<size_t> (entryList.size ());
883  checkMapInputArray ("(GST, ArrayView, GO, comm)",
884  entryList.getRawPtr (), numLclInds,
885  Kokkos::DefaultHostExecutionSpace (),
886  comm.getRawPtr ());
887  // Not quite sure if I trust both ArrayView and View to behave
888  // correctly if the pointer is nonnull but the array length is
889  // nonzero, so I'll make sure it's null if the length is zero.
890  const GlobalOrdinal* const indsRaw =
891  numLclInds == 0 ? NULL : entryList.getRawPtr ();
892  Kokkos::View<const GlobalOrdinal*,
893  Kokkos::LayoutLeft,
894  Kokkos::HostSpace,
895  Kokkos::MemoryUnmanaged> inds (indsRaw, numLclInds);
896  initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
897  indexBase, comm);
898  if (verbose) {
899  std::ostringstream os;
900  os << *prefix << "Done" << endl;
901  std::cerr << os.str();
902  }
903  }
904 
905  template <class LocalOrdinal, class GlobalOrdinal, class Node>
907  Map (const global_size_t numGlobalElements,
908  const Kokkos::View<const GlobalOrdinal*, device_type>& entryList,
909  const GlobalOrdinal indexBase,
910  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
911  comm_ (comm),
912  uniform_ (false),
913  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
914  {
915  using Kokkos::LayoutLeft;
916  using Kokkos::subview;
917  using Kokkos::View;
918  using Kokkos::view_alloc;
919  using Kokkos::WithoutInitializing;
920  using Teuchos::arcp;
921  using Teuchos::ArrayView;
922  using Teuchos::as;
923  using Teuchos::broadcast;
924  using Teuchos::outArg;
925  using Teuchos::ptr;
926  using Teuchos::REDUCE_MAX;
927  using Teuchos::REDUCE_MIN;
928  using Teuchos::REDUCE_SUM;
929  using Teuchos::reduceAll;
930  using Teuchos::typeName;
931  using std::endl;
932  using LO = local_ordinal_type;
933  using GO = global_ordinal_type;
934  using GST = global_size_t;
935  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
936  const char funcName[] =
937  "Map(gblNumInds,entryList(Kokkos::View),indexBase,comm)";
938 
939  const bool verbose = Details::Behavior::verbose("Map");
940  std::unique_ptr<std::string> prefix;
941  if (verbose) {
942  prefix = Details::createPrefix(
943  comm_.getRawPtr(), "Map", funcName);
944  std::ostringstream os;
945  os << *prefix << "Start" << endl;
946  std::cerr << os.str();
947  }
949  checkMapInputArray ("(GST, Kokkos::View, GO, comm)",
950  entryList.data (),
951  static_cast<size_t> (entryList.extent (0)),
952  execution_space (), comm.getRawPtr ());
953 
954  // The user has specified the distribution of indices over the
955  // processes, via the input array of global indices on each
956  // process. The distribution is not necessarily contiguous or
957  // equally shared over the processes.
958 
959  // The length of the input array on this process is the number of
960  // local indices to associate with this process, even though the
961  // input array contains global indices. We assume that the number
962  // of local indices on a process can be stored in a size_t;
963  // numLocalElements_ is a size_t, so this variable and that should
964  // have the same type.
965  const size_t numLocalElements(entryList.size());
966 
967  initialNonuniformDebugCheck(funcName, numGlobalElements,
968  numLocalElements, indexBase, comm);
969 
970  // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
971  // reduction is redundant, since the directory Map will have to do
972  // the same thing. Thus, we could do the scan and broadcast for
973  // the directory Map here, and give the computed offsets to the
974  // directory Map's constructor. However, a reduction costs less
975  // than a scan and broadcast, so this still saves time if users of
976  // this Map don't ever need the Directory (i.e., if they never
977  // call getRemoteIndexList on this Map).
978  if (numGlobalElements != GSTI) {
979  numGlobalElements_ = numGlobalElements; // Use the user's value.
980  }
981  else { // The user wants us to compute the sum.
982  reduceAll(*comm, REDUCE_SUM,
983  static_cast<GST>(numLocalElements),
984  outArg(numGlobalElements_));
985  }
986 
987  // mfh 20 Feb 2013: We've never quite done the right thing for
988  // duplicate GIDs here. Duplicate GIDs have always been counted
989  // distinctly in numLocalElements_, and thus should get a
990  // different LID. However, we've always used std::map or a hash
991  // table for the GID -> LID lookup table, so distinct GIDs always
992  // map to the same LID. Furthermore, the order of the input GID
993  // list matters, so it's not desirable to sort for determining
994  // uniqueness.
995  //
996  // I've chosen for now to write this code as if the input GID list
997  // contains no duplicates. If this is not desired, we could use
998  // the lookup table itself to determine uniqueness: If we haven't
999  // seen the GID before, it gets a new LID and it's added to the
1000  // LID -> GID and GID -> LID tables. If we have seen the GID
1001  // before, it doesn't get added to either table. I would
1002  // implement this, but it would cost more to do the double lookups
1003  // in the table (one to check, and one to insert).
1004  //
1005  // More importantly, since we build the GID -> LID table in (a
1006  // thread-) parallel (way), the order in which duplicate GIDs may
1007  // get inserted is not defined. This would make the assignment of
1008  // LID to GID nondeterministic.
1009 
1010  numLocalElements_ = numLocalElements;
1011  indexBase_ = indexBase;
1012 
1013  minMyGID_ = indexBase_;
1014  maxMyGID_ = indexBase_;
1015 
1016  // NOTE (mfh 27 May 2015): While finding the initial contiguous
1017  // GID range requires looking at all the GIDs in the range,
1018  // dismissing an interval of GIDs only requires looking at the
1019  // first and last GIDs. Thus, we could do binary search backwards
1020  // from the end in order to catch the common case of a contiguous
1021  // interval followed by noncontiguous entries. On the other hand,
1022  // we could just expose this case explicitly as yet another Map
1023  // constructor, and avoid the trouble of detecting it.
1024  if (numLocalElements_ > 0) {
1025  // Find contiguous GID range, with the restriction that the
1026  // beginning of the range starts with the first entry. While
1027  // doing so, fill in the LID -> GID table.
1028  typename decltype (lgMap_)::non_const_type lgMap
1029  (view_alloc ("lgMap", WithoutInitializing), numLocalElements_);
1030  auto lgMap_host =
1031  Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
1032 
1033  using array_layout =
1034  typename View<const GO*, device_type>::array_layout;
1035  View<GO*, array_layout, Kokkos::HostSpace> entryList_host
1036  (view_alloc ("entryList_host", WithoutInitializing),
1037  entryList.extent(0));
1038  Kokkos::deep_copy (entryList_host, entryList);
1040  firstContiguousGID_ = entryList_host[0];
1041  lastContiguousGID_ = firstContiguousGID_+1;
1042 
1043  // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
1044  // anyway, so we have to look at them all. The logical way to
1045  // find the first noncontiguous entry would thus be to "reduce,"
1046  // where the local reduction result is whether entryList[i] + 1
1047  // == entryList[i+1].
1048 
1049  lgMap_host[0] = firstContiguousGID_;
1050  size_t i = 1;
1051  for ( ; i < numLocalElements_; ++i) {
1052  const GO curGid = entryList_host[i];
1053  const LO curLid = as<LO> (i);
1054 
1055  if (lastContiguousGID_ != curGid) break;
1056 
1057  // Add the entry to the LID->GID table only after we know that
1058  // the current GID is in the initial contiguous sequence, so
1059  // that we don't repeat adding it in the first iteration of
1060  // the loop below over the remaining noncontiguous GIDs.
1061  lgMap_host[curLid] = curGid;
1062  ++lastContiguousGID_;
1063  }
1064  --lastContiguousGID_;
1065 
1066  // [firstContiguousGID_, lastContigousGID_] is the initial
1067  // sequence of contiguous GIDs. We can start the min and max
1068  // GID using this range.
1069  minMyGID_ = firstContiguousGID_;
1070  maxMyGID_ = lastContiguousGID_;
1071 
1072  // Compute the GID -> LID lookup table, _not_ including the
1073  // initial sequence of contiguous GIDs.
1074  {
1075  const std::pair<size_t, size_t> ncRange (i, entryList.extent (0));
1076  auto nonContigGids = subview (entryList, ncRange);
1077  TEUCHOS_TEST_FOR_EXCEPTION
1078  (static_cast<size_t> (nonContigGids.extent (0)) !=
1079  static_cast<size_t> (entryList.extent (0) - i),
1080  std::logic_error, "Tpetra::Map noncontiguous constructor: "
1081  "nonContigGids.extent(0) = "
1082  << nonContigGids.extent (0)
1083  << " != entryList.extent(0) - i = "
1084  << (entryList.extent (0) - i) << " = "
1085  << entryList.extent (0) << " - " << i
1086  << ". Please report this bug to the Tpetra developers.");
1087 
1088  glMap_ = global_to_local_table_type(nonContigGids,
1089  firstContiguousGID_,
1090  lastContiguousGID_,
1091  static_cast<LO> (i));
1092  // Make host version - when memory spaces match these just do trivial assignment
1093  glMapHost_ = global_to_local_table_host_type(glMap_);
1094  }
1095 
1096  // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
1097  // table above, we have to look at all the (noncontiguous) input
1098  // indices anyway. Thus, why not have the constructor compute
1099  // and return the min and max?
1100 
1101  for ( ; i < numLocalElements_; ++i) {
1102  const GO curGid = entryList_host[i];
1103  const LO curLid = static_cast<LO> (i);
1104  lgMap_host[curLid] = curGid; // LID -> GID table
1105 
1106  // While iterating through entryList, we compute its
1107  // (process-local) min and max elements.
1108  if (curGid < minMyGID_) {
1109  minMyGID_ = curGid;
1110  }
1111  if (curGid > maxMyGID_) {
1112  maxMyGID_ = curGid;
1113  }
1114  }
1115 
1116  // We filled lgMap on host above; now sync back to device.
1117  Kokkos::deep_copy (lgMap, lgMap_host);
1118 
1119  // "Commit" the local-to-global lookup table we filled in above.
1120  lgMap_ = lgMap;
1121  // We've already created this, so use it.
1122  lgMapHost_ = lgMap_host;
1123  }
1124  else {
1125  minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
1126  maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
1127  // This insures tests for GIDs in the range
1128  // [firstContiguousGID_, lastContiguousGID_] fail for processes
1129  // with no local elements.
1130  firstContiguousGID_ = indexBase_+1;
1131  lastContiguousGID_ = indexBase_;
1132  // glMap_ was default constructed, so it's already empty.
1133  }
1134 
1135  // Compute the min and max of all processes' GIDs. If
1136  // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
1137  // are both indexBase_. This is wrong, but fixing it would
1138  // require either a fancy sparse all-reduce, or a custom reduction
1139  // operator that ignores invalid values ("invalid" means
1140  // Tpetra::Details::OrdinalTraits<GO>::invalid()).
1141  //
1142  // Also, while we're at it, use the same all-reduce to figure out
1143  // if the Map is distributed. "Distributed" means that there is
1144  // at least one process with a number of local elements less than
1145  // the number of global elements.
1146  //
1147  // We're computing the min and max of all processes' GIDs using a
1148  // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
1149  // and y are signed). (This lets us combine the min and max into
1150  // a single all-reduce.) If each process sets localDist=1 if its
1151  // number of local elements is strictly less than the number of
1152  // global elements, and localDist=0 otherwise, then a MAX
1153  // all-reduce on localDist tells us if the Map is distributed (1
1154  // if yes, 0 if no). Thus, we can append localDist onto the end
1155  // of the data and get the global result from the all-reduce.
1156  if (std::numeric_limits<GO>::is_signed) {
1157  // Does my process NOT own all the elements?
1158  const GO localDist =
1159  (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
1160 
1161  GO minMaxInput[3];
1162  minMaxInput[0] = -minMyGID_;
1163  minMaxInput[1] = maxMyGID_;
1164  minMaxInput[2] = localDist;
1165 
1166  GO minMaxOutput[3];
1167  minMaxOutput[0] = 0;
1168  minMaxOutput[1] = 0;
1169  minMaxOutput[2] = 0;
1170  reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
1171  minAllGID_ = -minMaxOutput[0];
1172  maxAllGID_ = minMaxOutput[1];
1173  const GO globalDist = minMaxOutput[2];
1174  distributed_ = (comm_->getSize () > 1 && globalDist == 1);
1175  }
1176  else { // unsigned; use two reductions
1177  // This is always correct, no matter the signedness of GO.
1178  reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
1179  reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
1180  distributed_ = checkIsDist ();
1181  }
1182 
1183  contiguous_ = false; // "Contiguous" is conservative.
1184 
1185  TEUCHOS_TEST_FOR_EXCEPTION(
1186  minAllGID_ < indexBase_,
1187  std::invalid_argument,
1188  "Tpetra::Map constructor (noncontiguous): "
1189  "Minimum global ID = " << minAllGID_ << " over all process(es) is "
1190  "less than the given indexBase = " << indexBase_ << ".");
1191 
1192  // Create the Directory on demand in getRemoteIndexList().
1193  //setupDirectory ();
1194 
1195  if (verbose) {
1196  std::ostringstream os;
1197  os << *prefix << "Done" << endl;
1198  std::cerr << os.str();
1199  }
1200  }
1201 
1202 
1203  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1205  {
1206  if (! Kokkos::is_initialized ()) {
1207  std::ostringstream os;
1208  os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1209  "Kokkos::finalize() has been called. This is user error! There are "
1210  "two likely causes: " << std::endl <<
1211  " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1212  << std::endl <<
1213  " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1214  "of a Tpetra::Map) at the same scope in main() as Kokkos::finalize() "
1215  "or Tpetra::finalize()." << std::endl << std::endl <<
1216  "Don't do either of these! Please refer to GitHib Issue #2372."
1217  << std::endl;
1218  ::Tpetra::Details::printOnce (std::cerr, os.str (),
1219  this->getComm ().getRawPtr ());
1220  }
1221  else {
1222  using ::Tpetra::Details::mpiIsInitialized;
1223  using ::Tpetra::Details::mpiIsFinalized;
1224  using ::Tpetra::Details::teuchosCommIsAnMpiComm;
1225 
1226  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
1227  if (! comm.is_null () && teuchosCommIsAnMpiComm (*comm) &&
1228  mpiIsInitialized () && mpiIsFinalized ()) {
1229  // Tpetra itself does not require MPI, even if building with
1230  // MPI. It is legal to create Tpetra objects that do not use
1231  // MPI, even in an MPI program. However, calling Tpetra stuff
1232  // after MPI_Finalize() has been called is a bad idea, since
1233  // some Tpetra defaults may use MPI if available.
1234  std::ostringstream os;
1235  os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1236  "MPI_Finalize() has been called. This is user error! There are "
1237  "two likely causes: " << std::endl <<
1238  " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1239  << std::endl <<
1240  " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1241  "of a Tpetra::Map) at the same scope in main() as MPI_finalize() or "
1242  "Tpetra::finalize()." << std::endl << std::endl <<
1243  "Don't do either of these! Please refer to GitHib Issue #2372."
1244  << std::endl;
1245  ::Tpetra::Details::printOnce (std::cerr, os.str (), comm.getRawPtr ());
1246  }
1247  }
1248  // mfh 20 Mar 2018: We can't check Tpetra::isInitialized() yet,
1249  // because Tpetra does not yet require Tpetra::initialize /
1250  // Tpetra::finalize.
1251  }
1252 
1253  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1254  bool
1256  {
1257  TEUCHOS_TEST_FOR_EXCEPTION(
1258  getComm ().is_null (), std::logic_error, "Tpetra::Map::isOneToOne: "
1259  "getComm() returns null. Please report this bug to the Tpetra "
1260  "developers.");
1261 
1262  // This is a collective operation, if it hasn't been called before.
1263  setupDirectory ();
1264  return directory_->isOneToOne (*this);
1265  }
1266 
1267  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1268  LocalOrdinal
1270  getLocalElement (GlobalOrdinal globalIndex) const
1271  {
1272  if (isContiguous ()) {
1273  if (globalIndex < getMinGlobalIndex () ||
1274  globalIndex > getMaxGlobalIndex ()) {
1275  return Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1276  }
1277  return static_cast<LocalOrdinal> (globalIndex - getMinGlobalIndex ());
1278  }
1279  else if (globalIndex >= firstContiguousGID_ &&
1280  globalIndex <= lastContiguousGID_) {
1281  return static_cast<LocalOrdinal> (globalIndex - firstContiguousGID_);
1282  }
1283  else {
1284  // If the given global index is not in the table, this returns
1285  // the same value as OrdinalTraits<LocalOrdinal>::invalid().
1286  // glMapHost_ is Host and does not assume UVM
1287  return glMapHost_.get (globalIndex);
1288  }
1289  }
1290 
1291  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1292  GlobalOrdinal
1294  getGlobalElement (LocalOrdinal localIndex) const
1295  {
1296  if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1297  return Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ();
1298  }
1299  if (isContiguous ()) {
1300  return getMinGlobalIndex () + localIndex;
1301  }
1302  else {
1303  // This is a host Kokkos::View access, with no RCP or ArrayRCP
1304  // involvement. As a result, it is thread safe.
1305  //
1306  // lgMapHost_ is a host pointer; this does NOT assume UVM.
1307  return lgMapHost_[localIndex];
1308  }
1309  }
1310 
1311  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1312  bool
1314  isNodeLocalElement (LocalOrdinal localIndex) const
1315  {
1316  if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1317  return false;
1318  } else {
1319  return true;
1320  }
1321  }
1322 
1323  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1324  bool
1326  isNodeGlobalElement (GlobalOrdinal globalIndex) const {
1327  return this->getLocalElement (globalIndex) !=
1328  Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1329  }
1330 
1331  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1333  return uniform_;
1334  }
1335 
1336  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1338  return contiguous_;
1339  }
1340 
1341 
1342  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1345  getLocalMap () const
1346  {
1347  return local_map_type (glMap_, lgMap_, getIndexBase (),
1348  getMinGlobalIndex (), getMaxGlobalIndex (),
1349  firstContiguousGID_, lastContiguousGID_,
1350  getNodeNumElements (), isContiguous ());
1351  }
1352 
1353  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1354  bool
1357  {
1358  using Teuchos::outArg;
1359  using Teuchos::REDUCE_MIN;
1360  using Teuchos::reduceAll;
1361  //
1362  // Tests that avoid the Boolean all-reduce below by using
1363  // globally consistent quantities.
1364  //
1365  if (this == &map) {
1366  // Pointer equality on one process always implies pointer
1367  // equality on all processes, since Map is immutable.
1368  return true;
1369  }
1370  else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1371  // The two communicators have different numbers of processes.
1372  // It's not correct to call isCompatible() in that case. This
1373  // may result in the all-reduce hanging below.
1374  return false;
1375  }
1376  else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1377  // Two Maps are definitely NOT compatible if they have different
1378  // global numbers of indices.
1379  return false;
1380  }
1381  else if (isContiguous () && isUniform () &&
1382  map.isContiguous () && map.isUniform ()) {
1383  // Contiguous uniform Maps with the same number of processes in
1384  // their communicators, and with the same global numbers of
1385  // indices, are always compatible.
1386  return true;
1387  }
1388  else if (! isContiguous () && ! map.isContiguous () &&
1389  lgMap_.extent (0) != 0 && map.lgMap_.extent (0) != 0 &&
1390  lgMap_.data () == map.lgMap_.data ()) {
1391  // Noncontiguous Maps whose global index lists are nonempty and
1392  // have the same pointer must be the same (and therefore
1393  // contiguous).
1394  //
1395  // Nonempty is important. For example, consider a communicator
1396  // with two processes, and two Maps that share this
1397  // communicator, with zero global indices on the first process,
1398  // and different nonzero numbers of global indices on the second
1399  // process. In that case, on the first process, the pointers
1400  // would both be NULL.
1401  return true;
1402  }
1403 
1404  TEUCHOS_TEST_FOR_EXCEPTION(
1405  getGlobalNumElements () != map.getGlobalNumElements (), std::logic_error,
1406  "Tpetra::Map::isCompatible: There's a bug in this method. We've already "
1407  "checked that this condition is true above, but it's false here. "
1408  "Please report this bug to the Tpetra developers.");
1409 
1410  // Do both Maps have the same number of indices on each process?
1411  const int locallyCompat =
1412  (getNodeNumElements () == map.getNodeNumElements ()) ? 1 : 0;
1413 
1414  int globallyCompat = 0;
1415  reduceAll<int, int> (*comm_, REDUCE_MIN, locallyCompat, outArg (globallyCompat));
1416  return (globallyCompat == 1);
1417  }
1418 
1419  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1420  bool
1423  {
1424  using Teuchos::ArrayView;
1425  using GO = global_ordinal_type;
1426  using size_type = typename ArrayView<const GO>::size_type;
1427 
1428  // If both Maps are contiguous, we can compare their GID ranges
1429  // easily by looking at the min and max GID on this process.
1430  // Otherwise, we'll compare their GID lists. If only one Map is
1431  // contiguous, then we only have to call getNodeElementList() on
1432  // the noncontiguous Map. (It's best to avoid calling it on a
1433  // contiguous Map, since it results in unnecessary storage that
1434  // persists for the lifetime of the Map.)
1435 
1436  if (this == &map) {
1437  // Pointer equality on one process always implies pointer
1438  // equality on all processes, since Map is immutable.
1439  return true;
1440  }
1441  else if (getNodeNumElements () != map.getNodeNumElements ()) {
1442  return false;
1443  }
1444  else if (getMinGlobalIndex () != map.getMinGlobalIndex () ||
1445  getMaxGlobalIndex () != map.getMaxGlobalIndex ()) {
1446  return false;
1447  }
1448  else {
1449  if (isContiguous ()) {
1450  if (map.isContiguous ()) {
1451  return true; // min and max match, so the ranges match.
1452  }
1453  else { // *this is contiguous, but map is not contiguous
1454  TEUCHOS_TEST_FOR_EXCEPTION(
1455  ! this->isContiguous () || map.isContiguous (), std::logic_error,
1456  "Tpetra::Map::locallySameAs: BUG");
1457  ArrayView<const GO> rhsElts = map.getNodeElementList ();
1458  const GO minLhsGid = this->getMinGlobalIndex ();
1459  const size_type numRhsElts = rhsElts.size ();
1460  for (size_type k = 0; k < numRhsElts; ++k) {
1461  const GO curLhsGid = minLhsGid + static_cast<GO> (k);
1462  if (curLhsGid != rhsElts[k]) {
1463  return false; // stop on first mismatch
1464  }
1465  }
1466  return true;
1467  }
1468  }
1469  else if (map.isContiguous ()) { // *this is not contiguous, but map is
1470  TEUCHOS_TEST_FOR_EXCEPTION(
1471  this->isContiguous () || ! map.isContiguous (), std::logic_error,
1472  "Tpetra::Map::locallySameAs: BUG");
1473  ArrayView<const GO> lhsElts = this->getNodeElementList ();
1474  const GO minRhsGid = map.getMinGlobalIndex ();
1475  const size_type numLhsElts = lhsElts.size ();
1476  for (size_type k = 0; k < numLhsElts; ++k) {
1477  const GO curRhsGid = minRhsGid + static_cast<GO> (k);
1478  if (curRhsGid != lhsElts[k]) {
1479  return false; // stop on first mismatch
1480  }
1481  }
1482  return true;
1483  }
1484  else if (this->lgMap_.data () == map.lgMap_.data ()) {
1485  // Pointers to LID->GID "map" (actually just an array) are the
1486  // same, and the number of GIDs are the same.
1487  return this->getNodeNumElements () == map.getNodeNumElements ();
1488  }
1489  else { // we actually have to compare the GIDs
1490  if (this->getNodeNumElements () != map.getNodeNumElements ()) {
1491  return false; // We already checked above, but check just in case
1492  }
1493  else {
1494  ArrayView<const GO> lhsElts = getNodeElementList ();
1495  ArrayView<const GO> rhsElts = map.getNodeElementList ();
1496 
1497  // std::equal requires that the latter range is as large as
1498  // the former. We know the ranges have equal length, because
1499  // they have the same number of local entries.
1500  return std::equal (lhsElts.begin (), lhsElts.end (), rhsElts.begin ());
1501  }
1502  }
1503  }
1504  }
1505 
1506  template <class LocalOrdinal,class GlobalOrdinal, class Node>
1507  bool
1510  {
1511  if (this == &map)
1512  return true;
1513 
1514  // We are going to check if lmap1 is fitted into lmap2
1515  auto lmap1 = map.getLocalMap();
1516  auto lmap2 = this->getLocalMap();
1517 
1518  auto numLocalElements1 = lmap1.getNodeNumElements();
1519  auto numLocalElements2 = lmap2.getNodeNumElements();
1520 
1521  if (numLocalElements1 > numLocalElements2) {
1522  // There are more indices in the first map on this process than in second map.
1523  return false;
1524  }
1525 
1526  if (lmap1.isContiguous () && lmap2.isContiguous ()) {
1527  // When both Maps are contiguous, just check the interval inclusion.
1528  return ((lmap1.getMinGlobalIndex () == lmap2.getMinGlobalIndex ()) &&
1529  (lmap1.getMaxGlobalIndex () <= lmap2.getMaxGlobalIndex ()));
1530  }
1531 
1532  if (lmap1.getMinGlobalIndex () < lmap2.getMinGlobalIndex () ||
1533  lmap1.getMaxGlobalIndex () > lmap2.getMaxGlobalIndex ()) {
1534  // The second map does not include the first map bounds, and thus some of
1535  // the first map global indices are not in the second map.
1536  return false;
1537  }
1538 
1539  using LO = local_ordinal_type;
1540  using range_type =
1541  Kokkos::RangePolicy<LO, typename node_type::execution_space>;
1542 
1543  // Check all elements.
1544  LO numDiff = 0;
1545  Kokkos::parallel_reduce(
1546  "isLocallyFitted",
1547  range_type(0, numLocalElements1),
1548  KOKKOS_LAMBDA (const LO i, LO& diff) {
1549  diff += (lmap1.getGlobalElement(i) != lmap2.getGlobalElement(i));
1550  }, numDiff);
1551 
1552  return (numDiff == 0);
1553  }
1554 
1555  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1556  bool
1559  {
1560  using Teuchos::outArg;
1561  using Teuchos::REDUCE_MIN;
1562  using Teuchos::reduceAll;
1563  //
1564  // Tests that avoid the Boolean all-reduce below by using
1565  // globally consistent quantities.
1566  //
1567  if (this == &map) {
1568  // Pointer equality on one process always implies pointer
1569  // equality on all processes, since Map is immutable.
1570  return true;
1571  }
1572  else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1573  // The two communicators have different numbers of processes.
1574  // It's not correct to call isSameAs() in that case. This
1575  // may result in the all-reduce hanging below.
1576  return false;
1577  }
1578  else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1579  // Two Maps are definitely NOT the same if they have different
1580  // global numbers of indices.
1581  return false;
1582  }
1583  else if (getMinAllGlobalIndex () != map.getMinAllGlobalIndex () ||
1584  getMaxAllGlobalIndex () != map.getMaxAllGlobalIndex () ||
1585  getIndexBase () != map.getIndexBase ()) {
1586  // If the global min or max global index doesn't match, or if
1587  // the index base doesn't match, then the Maps aren't the same.
1588  return false;
1589  }
1590  else if (isDistributed () != map.isDistributed ()) {
1591  // One Map is distributed and the other is not, which means that
1592  // the Maps aren't the same.
1593  return false;
1594  }
1595  else if (isContiguous () && isUniform () &&
1596  map.isContiguous () && map.isUniform ()) {
1597  // Contiguous uniform Maps with the same number of processes in
1598  // their communicators, with the same global numbers of indices,
1599  // and with matching index bases and ranges, must be the same.
1600  return true;
1601  }
1602 
1603  // The two communicators must have the same number of processes,
1604  // with process ranks occurring in the same order. This uses
1605  // MPI_COMM_COMPARE. The MPI 3.1 standard (Section 6.4) says:
1606  // "Operations that access communicators are local and their
1607  // execution does not require interprocess communication."
1608  // However, just to be sure, I'll put this call after the above
1609  // tests that don't communicate.
1610  if (! ::Tpetra::Details::congruent (*comm_, * (map.getComm ()))) {
1611  return false;
1612  }
1613 
1614  // If we get this far, we need to check local properties and then
1615  // communicate local sameness across all processes.
1616  const int isSame_lcl = locallySameAs (map) ? 1 : 0;
1617 
1618  // Return true if and only if all processes report local sameness.
1619  int isSame_gbl = 0;
1620  reduceAll<int, int> (*comm_, REDUCE_MIN, isSame_lcl, outArg (isSame_gbl));
1621  return isSame_gbl == 1;
1622  }
1623 
1624  namespace { // (anonymous)
1625  template <class LO, class GO, class DT>
1626  class FillLgMap {
1627  public:
1628  FillLgMap (const Kokkos::View<GO*, DT>& lgMap,
1629  const GO startGid) :
1630  lgMap_ (lgMap), startGid_ (startGid)
1631  {
1632  Kokkos::RangePolicy<LO, typename DT::execution_space>
1633  range (static_cast<LO> (0), static_cast<LO> (lgMap.size ()));
1634  Kokkos::parallel_for (range, *this);
1635  }
1636 
1637  KOKKOS_INLINE_FUNCTION void operator () (const LO& lid) const {
1638  lgMap_(lid) = startGid_ + static_cast<GO> (lid);
1639  }
1640 
1641  private:
1642  const Kokkos::View<GO*, DT> lgMap_;
1643  const GO startGid_;
1644  };
1645 
1646  } // namespace (anonymous)
1647 
1648 
1649  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1650  typename Map<LocalOrdinal,GlobalOrdinal,Node>::global_indices_array_type
1652  {
1653  using std::endl;
1654  using LO = local_ordinal_type;
1655  using GO = global_ordinal_type;
1656  using const_lg_view_type = decltype(lgMap_);
1657  using lg_view_type = typename const_lg_view_type::non_const_type;
1658  const bool debug = Details::Behavior::debug("Map");
1659  const bool verbose = Details::Behavior::verbose("Map");
1660 
1661  std::unique_ptr<std::string> prefix;
1662  if (verbose) {
1663  prefix = Details::createPrefix(
1664  comm_.getRawPtr(), "Map", "getMyGlobalIndices");
1665  std::ostringstream os;
1666  os << *prefix << "Start" << endl;
1667  std::cerr << os.str();
1668  }
1669 
1670  // If the local-to-global mapping doesn't exist yet, and if we
1671  // have local entries, then create and fill the local-to-global
1672  // mapping.
1673  const bool needToCreateLocalToGlobalMapping =
1674  lgMap_.extent (0) == 0 && numLocalElements_ > 0;
1675 
1676  if (needToCreateLocalToGlobalMapping) {
1677  if (verbose) {
1678  std::ostringstream os;
1679  os << *prefix << "Need to create lgMap" << endl;
1680  std::cerr << os.str();
1681  }
1682  if (debug) {
1683  // The local-to-global mapping should have been set up already
1684  // for a noncontiguous map.
1685  TEUCHOS_TEST_FOR_EXCEPTION
1686  (! isContiguous(), std::logic_error,
1687  "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1688  "mapping (lgMap_) should have been set up already for a "
1689  "noncontiguous Map. Please report this bug to the Tpetra "
1690  "developers.");
1691  }
1692  const LO numElts = static_cast<LO> (getNodeNumElements ());
1693 
1694  using Kokkos::view_alloc;
1695  using Kokkos::WithoutInitializing;
1696  lg_view_type lgMap ("lgMap", numElts);
1697  if (verbose) {
1698  std::ostringstream os;
1699  os << *prefix << "Fill lgMap" << endl;
1700  std::cerr << os.str();
1701  }
1702  FillLgMap<LO, GO, no_uvm_device_type> fillIt (lgMap, minMyGID_);
1703 
1704  if (verbose) {
1705  std::ostringstream os;
1706  os << *prefix << "Copy lgMap to lgMapHost" << endl;
1707  std::cerr << os.str();
1708  }
1709 
1710  auto lgMapHost =
1711  Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
1712  Kokkos::deep_copy (lgMapHost, lgMap);
1713 
1714  // "Commit" the local-to-global lookup table we filled in above.
1715  lgMap_ = lgMap;
1716  lgMapHost_ = lgMapHost;
1717  }
1718 
1719  if (verbose) {
1720  std::ostringstream os;
1721  os << *prefix << "Done" << endl;
1722  std::cerr << os.str();
1723  }
1724  return lgMapHost_;
1725  }
1726 
1727  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1728  Teuchos::ArrayView<const GlobalOrdinal>
1730  {
1731  using GO = global_ordinal_type;
1732 
1733  // If the local-to-global mapping doesn't exist yet, and if we
1734  // have local entries, then create and fill the local-to-global
1735  // mapping.
1736  (void) this->getMyGlobalIndices ();
1737 
1738  // This does NOT assume UVM; lgMapHost_ is a host pointer.
1739  const GO* lgMapHostRawPtr = lgMapHost_.data ();
1740  // The third argument forces ArrayView not to try to track memory
1741  // in a debug build. We have to use it because the memory does
1742  // not belong to a Teuchos memory management class.
1743  return Teuchos::ArrayView<const GO>(
1744  lgMapHostRawPtr,
1745  lgMapHost_.extent (0),
1746  Teuchos::RCP_DISABLE_NODE_LOOKUP);
1747  }
1748 
1749  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1751  return distributed_;
1752  }
1753 
1754  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1756  using Teuchos::TypeNameTraits;
1757  std::ostringstream os;
1758 
1759  os << "Tpetra::Map: {"
1760  << "LocalOrdinalType: " << TypeNameTraits<LocalOrdinal>::name ()
1761  << ", GlobalOrdinalType: " << TypeNameTraits<GlobalOrdinal>::name ()
1762  << ", NodeType: " << TypeNameTraits<Node>::name ();
1763  if (this->getObjectLabel () != "") {
1764  os << ", Label: \"" << this->getObjectLabel () << "\"";
1765  }
1766  os << ", Global number of entries: " << getGlobalNumElements ()
1767  << ", Number of processes: " << getComm ()->getSize ()
1768  << ", Uniform: " << (isUniform () ? "true" : "false")
1769  << ", Contiguous: " << (isContiguous () ? "true" : "false")
1770  << ", Distributed: " << (isDistributed () ? "true" : "false")
1771  << "}";
1772  return os.str ();
1773  }
1774 
1779  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1780  std::string
1782  localDescribeToString (const Teuchos::EVerbosityLevel vl) const
1783  {
1784  using LO = local_ordinal_type;
1785  using std::endl;
1786 
1787  // This preserves current behavior of Map.
1788  if (vl < Teuchos::VERB_HIGH) {
1789  return std::string ();
1790  }
1791  auto outStringP = Teuchos::rcp (new std::ostringstream ());
1792  Teuchos::RCP<Teuchos::FancyOStream> outp =
1793  Teuchos::getFancyOStream (outStringP);
1794  Teuchos::FancyOStream& out = *outp;
1795 
1796  auto comm = this->getComm ();
1797  const int myRank = comm->getRank ();
1798  const int numProcs = comm->getSize ();
1799  out << "Process " << myRank << " of " << numProcs << ":" << endl;
1800  Teuchos::OSTab tab1 (out);
1801 
1802  const LO numEnt = static_cast<LO> (this->getNodeNumElements ());
1803  out << "My number of entries: " << numEnt << endl
1804  << "My minimum global index: " << this->getMinGlobalIndex () << endl
1805  << "My maximum global index: " << this->getMaxGlobalIndex () << endl;
1806 
1807  if (vl == Teuchos::VERB_EXTREME) {
1808  out << "My global indices: [";
1809  const LO minLclInd = this->getMinLocalIndex ();
1810  for (LO k = 0; k < numEnt; ++k) {
1811  out << minLclInd + this->getGlobalElement (k);
1812  if (k + 1 < numEnt) {
1813  out << ", ";
1814  }
1815  }
1816  out << "]" << endl;
1817  }
1818 
1819  out.flush (); // make sure the ostringstream got everything
1820  return outStringP->str ();
1821  }
1822 
1823  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1824  void
1826  describe (Teuchos::FancyOStream &out,
1827  const Teuchos::EVerbosityLevel verbLevel) const
1828  {
1829  using Teuchos::TypeNameTraits;
1830  using Teuchos::VERB_DEFAULT;
1831  using Teuchos::VERB_NONE;
1832  using Teuchos::VERB_LOW;
1833  using Teuchos::VERB_HIGH;
1834  using std::endl;
1835  using LO = local_ordinal_type;
1836  using GO = global_ordinal_type;
1837  const Teuchos::EVerbosityLevel vl =
1838  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
1839 
1840  if (vl == VERB_NONE) {
1841  return; // don't print anything
1842  }
1843  // If this Map's Comm is null, then the Map does not participate
1844  // in collective operations with the other processes. In that
1845  // case, it is not even legal to call this method. The reasonable
1846  // thing to do in that case is nothing.
1847  auto comm = this->getComm ();
1848  if (comm.is_null ()) {
1849  return;
1850  }
1851  const int myRank = comm->getRank ();
1852  const int numProcs = comm->getSize ();
1853 
1854  // Only Process 0 should touch the output stream, but this method
1855  // in general may need to do communication. Thus, we may need to
1856  // preserve the current tab level across multiple "if (myRank ==
1857  // 0) { ... }" inner scopes. This is why we sometimes create
1858  // OSTab instances by pointer, instead of by value. We only need
1859  // to create them by pointer if the tab level must persist through
1860  // multiple inner scopes.
1861  Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
1862 
1863  if (myRank == 0) {
1864  // At every verbosity level but VERB_NONE, Process 0 prints.
1865  // By convention, describe() always begins with a tab before
1866  // printing.
1867  tab0 = Teuchos::rcp (new Teuchos::OSTab (out));
1868  out << "\"Tpetra::Map\":" << endl;
1869  tab1 = Teuchos::rcp (new Teuchos::OSTab (out));
1870  {
1871  out << "Template parameters:" << endl;
1872  Teuchos::OSTab tab2 (out);
1873  out << "LocalOrdinal: " << TypeNameTraits<LO>::name () << endl
1874  << "GlobalOrdinal: " << TypeNameTraits<GO>::name () << endl
1875  << "Node: " << TypeNameTraits<Node>::name () << endl;
1876  }
1877  const std::string label = this->getObjectLabel ();
1878  if (label != "") {
1879  out << "Label: \"" << label << "\"" << endl;
1880  }
1881  out << "Global number of entries: " << getGlobalNumElements () << endl
1882  << "Minimum global index: " << getMinAllGlobalIndex () << endl
1883  << "Maximum global index: " << getMaxAllGlobalIndex () << endl
1884  << "Index base: " << getIndexBase () << endl
1885  << "Number of processes: " << numProcs << endl
1886  << "Uniform: " << (isUniform () ? "true" : "false") << endl
1887  << "Contiguous: " << (isContiguous () ? "true" : "false") << endl
1888  << "Distributed: " << (isDistributed () ? "true" : "false") << endl;
1889  }
1890 
1891  // This is collective over the Map's communicator.
1892  if (vl >= VERB_HIGH) { // VERB_HIGH or VERB_EXTREME
1893  const std::string lclStr = this->localDescribeToString (vl);
1894  Tpetra::Details::gathervPrint (out, lclStr, *comm);
1895  }
1896  }
1897 
1898  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1899  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
1901  replaceCommWithSubset (const Teuchos::RCP<const Teuchos::Comm<int> >& newComm) const
1902  {
1903  using Teuchos::RCP;
1904  using Teuchos::rcp;
1905  using GST = global_size_t;
1906  using LO = local_ordinal_type;
1907  using GO = global_ordinal_type;
1908  using map_type = Map<LO, GO, Node>;
1909 
1910  // mfh 26 Mar 2013: The lazy way to do this is simply to recreate
1911  // the Map by calling its ordinary public constructor, using the
1912  // original Map's data. This only involves O(1) all-reduces over
1913  // the new communicator, which in the common case only includes a
1914  // small number of processes.
1915 
1916  // Create the Map to return.
1917  if (newComm.is_null () || newComm->getSize () < 1) {
1918  return Teuchos::null; // my process does not participate in the new Map
1919  }
1920  else if (newComm->getSize () == 1) {
1921  // The case where the new communicator has only one process is
1922  // easy. We don't have to communicate to get all the
1923  // information we need. Use the default comm to create the new
1924  // Map, then fill in all the fields directly.
1925  RCP<map_type> newMap (new map_type ());
1926 
1927  newMap->comm_ = newComm;
1928  // mfh 07 Oct 2016: Preserve original behavior, even though the
1929  // original index base may no longer be the globally min global
1930  // index. See #616 for why this doesn't matter so much anymore.
1931  newMap->indexBase_ = this->indexBase_;
1932  newMap->numGlobalElements_ = this->numLocalElements_;
1933  newMap->numLocalElements_ = this->numLocalElements_;
1934  newMap->minMyGID_ = this->minMyGID_;
1935  newMap->maxMyGID_ = this->maxMyGID_;
1936  newMap->minAllGID_ = this->minMyGID_;
1937  newMap->maxAllGID_ = this->maxMyGID_;
1938  newMap->firstContiguousGID_ = this->firstContiguousGID_;
1939  newMap->lastContiguousGID_ = this->lastContiguousGID_;
1940  // Since the new communicator has only one process, neither
1941  // uniformity nor contiguity have changed.
1942  newMap->uniform_ = this->uniform_;
1943  newMap->contiguous_ = this->contiguous_;
1944  // The new communicator only has one process, so the new Map is
1945  // not distributed.
1946  newMap->distributed_ = false;
1947  newMap->lgMap_ = this->lgMap_;
1948  newMap->lgMapHost_ = this->lgMapHost_;
1949  newMap->glMap_ = this->glMap_;
1950  newMap->glMapHost_ = this->glMapHost_;
1951  // It's OK not to initialize the new Map's Directory.
1952  // This is initialized lazily, on first call to getRemoteIndexList.
1953 
1954  return newMap;
1955  }
1956  else { // newComm->getSize() != 1
1957  // Even if the original Map is contiguous, the new Map might not
1958  // be, especially if the excluded processes have ranks != 0 or
1959  // newComm->getSize()-1. The common case for this method is to
1960  // exclude many (possibly even all but one) processes, so it
1961  // likely doesn't pay to do the global communication (over the
1962  // original communicator) to figure out whether we can optimize
1963  // the result Map. Thus, we just set up the result Map as
1964  // noncontiguous.
1965  //
1966  // TODO (mfh 07 Oct 2016) We don't actually need to reconstruct
1967  // the global-to-local table, etc. Optimize this code path to
1968  // avoid unnecessary local work.
1969 
1970  // Make Map (re)compute the global number of elements.
1971  const GST RECOMPUTE = Tpetra::Details::OrdinalTraits<GST>::invalid ();
1972  // TODO (mfh 07 Oct 2016) If we use any Map constructor, we have
1973  // to use the noncontiguous Map constructor, since the new Map
1974  // might not be contiguous. Even if the old Map was contiguous,
1975  // some process in the "middle" might have been excluded. If we
1976  // want to avoid local work, we either have to do the setup by
1977  // hand, or write a new Map constructor.
1978 #if 1
1979  // The disabled code here throws the following exception in
1980  // Map's replaceCommWithSubset test:
1981  //
1982  // Throw test that evaluated to true: static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::Details::ArithTraits<ValueType>::max ())
1983  // 10:
1984  // 10: Tpetra::Details::FixedHashTable: The number of keys -3 is greater than the maximum representable ValueType value 2147483647. This means that it is not possible to use this constructor.
1985  // 10: Process 3: origComm->replaceCommWithSubset(subsetComm) threw an exception: /scratch/prj/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Details_FixedHashTable_def.hpp:1044:
1986 
1987  auto lgMap = this->getMyGlobalIndices ();
1988  using size_type =
1989  typename std::decay<decltype (lgMap.extent (0)) >::type;
1990  const size_type lclNumInds =
1991  static_cast<size_type> (this->getNodeNumElements ());
1992  using Teuchos::TypeNameTraits;
1993  TEUCHOS_TEST_FOR_EXCEPTION
1994  (lgMap.extent (0) != lclNumInds, std::logic_error,
1995  "Tpetra::Map::replaceCommWithSubset: Result of getMyGlobalIndices() "
1996  "has length " << lgMap.extent (0) << " (of type " <<
1997  TypeNameTraits<size_type>::name () << ") != this->getNodeNumElements()"
1998  " = " << this->getNodeNumElements () << ". The latter, upon being "
1999  "cast to size_type = " << TypeNameTraits<size_type>::name () << ", "
2000  "becomes " << lclNumInds << ". Please report this bug to the Tpetra "
2001  "developers.");
2002 #else
2003  Teuchos::ArrayView<const GO> lgMap = this->getNodeElementList ();
2004 #endif // 1
2005 
2006  const GO indexBase = this->getIndexBase ();
2007  // map stores HostSpace of CudaSpace but constructor is still CudaUVMSpace
2008  auto lgMap_device = Kokkos::create_mirror_view_and_copy(device_type(), lgMap);
2009  return rcp (new map_type (RECOMPUTE, lgMap_device, indexBase, newComm));
2010  }
2011  }
2012 
2013  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2014  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
2016  removeEmptyProcesses () const
2017  {
2018  using Teuchos::Comm;
2019  using Teuchos::null;
2020  using Teuchos::outArg;
2021  using Teuchos::RCP;
2022  using Teuchos::rcp;
2023  using Teuchos::REDUCE_MIN;
2024  using Teuchos::reduceAll;
2025 
2026  // Create the new communicator. split() returns a valid
2027  // communicator on all processes. On processes where color == 0,
2028  // ignore the result. Passing key == 0 tells MPI to order the
2029  // processes in the new communicator by their rank in the old
2030  // communicator.
2031  const int color = (numLocalElements_ == 0) ? 0 : 1;
2032  // MPI_Comm_split must be called collectively over the original
2033  // communicator. We can't just call it on processes with color
2034  // one, even though we will ignore its result on processes with
2035  // color zero.
2036  RCP<const Comm<int> > newComm = comm_->split (color, 0);
2037  if (color == 0) {
2038  newComm = null;
2039  }
2040 
2041  // Create the Map to return.
2042  if (newComm.is_null ()) {
2043  return null; // my process does not participate in the new Map
2044  } else {
2045  RCP<Map> map = rcp (new Map ());
2046 
2047  map->comm_ = newComm;
2048  map->indexBase_ = indexBase_;
2049  map->numGlobalElements_ = numGlobalElements_;
2050  map->numLocalElements_ = numLocalElements_;
2051  map->minMyGID_ = minMyGID_;
2052  map->maxMyGID_ = maxMyGID_;
2053  map->minAllGID_ = minAllGID_;
2054  map->maxAllGID_ = maxAllGID_;
2055  map->firstContiguousGID_= firstContiguousGID_;
2056  map->lastContiguousGID_ = lastContiguousGID_;
2057 
2058  // Uniformity and contiguity have not changed. The directory
2059  // has changed, but we've taken care of that above.
2060  map->uniform_ = uniform_;
2061  map->contiguous_ = contiguous_;
2062 
2063  // If the original Map was NOT distributed, then the new Map
2064  // cannot be distributed.
2065  //
2066  // If the number of processes in the new communicator is 1, then
2067  // the new Map is not distributed.
2068  //
2069  // Otherwise, we have to check the new Map using an all-reduce
2070  // (over the new communicator). For example, the original Map
2071  // may have had some processes with zero elements, and all other
2072  // processes with the same number of elements as in the whole
2073  // Map. That Map is technically distributed, because of the
2074  // processes with zero elements. Removing those processes would
2075  // make the new Map locally replicated.
2076  if (! distributed_ || newComm->getSize () == 1) {
2077  map->distributed_ = false;
2078  } else {
2079  const int iOwnAllGids = (numLocalElements_ == numGlobalElements_) ? 1 : 0;
2080  int allProcsOwnAllGids = 0;
2081  reduceAll<int, int> (*newComm, REDUCE_MIN, iOwnAllGids, outArg (allProcsOwnAllGids));
2082  map->distributed_ = (allProcsOwnAllGids == 1) ? false : true;
2083  }
2084 
2085  map->lgMap_ = lgMap_;
2086  map->lgMapHost_ = lgMapHost_;
2087  map->glMap_ = glMap_;
2088  map->glMapHost_ = glMapHost_;
2089 
2090  // Map's default constructor creates an uninitialized Directory.
2091  // The Directory will be initialized on demand in
2092  // getRemoteIndexList().
2093  //
2094  // FIXME (mfh 26 Mar 2013) It should be possible to "filter" the
2095  // directory more efficiently than just recreating it. If
2096  // directory recreation proves a bottleneck, we can always
2097  // revisit this. On the other hand, Directory creation is only
2098  // collective over the new, presumably much smaller
2099  // communicator, so it may not be worth the effort to optimize.
2100 
2101  return map;
2102  }
2103  }
2104 
2105  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2106  void
2108  {
2109  TEUCHOS_TEST_FOR_EXCEPTION(
2110  directory_.is_null (), std::logic_error, "Tpetra::Map::setupDirectory: "
2111  "The Directory is null. "
2112  "Please report this bug to the Tpetra developers.");
2113 
2114  // Only create the Directory if it hasn't been created yet.
2115  // This is a collective operation.
2116  if (! directory_->initialized ()) {
2117  directory_->initialize (*this);
2118  }
2119  }
2120 
2121  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2122  LookupStatus
2124  getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
2125  const Teuchos::ArrayView<int>& PIDs,
2126  const Teuchos::ArrayView<LocalOrdinal>& LIDs) const
2127  {
2128  using Tpetra::Details::OrdinalTraits;
2130  using std::endl;
2131  using size_type = Teuchos::ArrayView<int>::size_type;
2132 
2133  const bool verbose = Details::Behavior::verbose("Map");
2134  const size_t maxNumToPrint = verbose ?
2136  std::unique_ptr<std::string> prefix;
2137  if (verbose) {
2138  prefix = Details::createPrefix(comm_.getRawPtr(),
2139  "Map", "getRemoteIndexList(GIDs,PIDs,LIDs)");
2140  std::ostringstream os;
2141  os << *prefix << "Start: ";
2142  verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2143  os << endl;
2144  std::cerr << os.str();
2145  }
2146 
2147  // Empty Maps (i.e., containing no indices on any processes in the
2148  // Map's communicator) are perfectly valid. In that case, if the
2149  // input GID list is nonempty, we fill the output arrays with
2150  // invalid values, and return IDNotPresent to notify the caller.
2151  // It's perfectly valid to give getRemoteIndexList GIDs that the
2152  // Map doesn't own. SubmapImport test 2 needs this functionality.
2153  if (getGlobalNumElements () == 0) {
2154  if (GIDs.size () == 0) {
2155  if (verbose) {
2156  std::ostringstream os;
2157  os << *prefix << "Done; both Map & input are empty" << endl;
2158  std::cerr << os.str();
2159  }
2160  return AllIDsPresent; // trivially
2161  }
2162  else {
2163  if (verbose) {
2164  std::ostringstream os;
2165  os << *prefix << "Done: Map is empty on all processes, "
2166  "so all output PIDs & LIDs are invalid (-1)." << endl;
2167  std::cerr << os.str();
2168  }
2169  for (size_type k = 0; k < PIDs.size (); ++k) {
2170  PIDs[k] = OrdinalTraits<int>::invalid ();
2171  }
2172  for (size_type k = 0; k < LIDs.size (); ++k) {
2173  LIDs[k] = OrdinalTraits<LocalOrdinal>::invalid ();
2174  }
2175  return IDNotPresent;
2176  }
2177  }
2178 
2179  // getRemoteIndexList must be called collectively, and Directory
2180  // initialization is collective too, so it's OK to initialize the
2181  // Directory on demand.
2182 
2183  if (verbose) {
2184  std::ostringstream os;
2185  os << *prefix << "Call setupDirectory" << endl;
2186  std::cerr << os.str();
2187  }
2188  setupDirectory();
2189  if (verbose) {
2190  std::ostringstream os;
2191  os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2192  std::cerr << os.str();
2193  }
2194  const Tpetra::LookupStatus retVal =
2195  directory_->getDirectoryEntries (*this, GIDs, PIDs, LIDs);
2196  if (verbose) {
2197  std::ostringstream os;
2198  os << *prefix << "Done; getDirectoryEntries returned "
2199  << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2200  << "; ";
2201  verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2202  os << ", ";
2203  verbosePrintArray(os, LIDs, "LIDs", maxNumToPrint);
2204  os << endl;
2205  std::cerr << os.str();
2206  }
2207  return retVal;
2208  }
2209 
2210  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2211  LookupStatus
2213  getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal> & GIDs,
2214  const Teuchos::ArrayView<int> & PIDs) const
2215  {
2217  using std::endl;
2218 
2219  const bool verbose = Details::Behavior::verbose("Map");
2220  const size_t maxNumToPrint = verbose ?
2222  std::unique_ptr<std::string> prefix;
2223  if (verbose) {
2224  prefix = Details::createPrefix(comm_.getRawPtr(),
2225  "Map", "getRemoteIndexList(GIDs,PIDs)");
2226  std::ostringstream os;
2227  os << *prefix << "Start: ";
2228  verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2229  os << endl;
2230  std::cerr << os.str();
2231  }
2232 
2233  if (getGlobalNumElements () == 0) {
2234  if (GIDs.size () == 0) {
2235  if (verbose) {
2236  std::ostringstream os;
2237  os << *prefix << "Done; both Map & input are empty" << endl;
2238  std::cerr << os.str();
2239  }
2240  return AllIDsPresent; // trivially
2241  }
2242  else {
2243  if (verbose) {
2244  std::ostringstream os;
2245  os << *prefix << "Done: Map is empty on all processes, "
2246  "so all output PIDs are invalid (-1)." << endl;
2247  std::cerr << os.str();
2248  }
2249  for (Teuchos::ArrayView<int>::size_type k = 0; k < PIDs.size (); ++k) {
2250  PIDs[k] = Tpetra::Details::OrdinalTraits<int>::invalid ();
2251  }
2252  return IDNotPresent;
2253  }
2254  }
2255 
2256  // getRemoteIndexList must be called collectively, and Directory
2257  // initialization is collective too, so it's OK to initialize the
2258  // Directory on demand.
2259 
2260  if (verbose) {
2261  std::ostringstream os;
2262  os << *prefix << "Call setupDirectory" << endl;
2263  std::cerr << os.str();
2264  }
2265  setupDirectory();
2266  if (verbose) {
2267  std::ostringstream os;
2268  os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2269  std::cerr << os.str();
2270  }
2271  const Tpetra::LookupStatus retVal =
2272  directory_->getDirectoryEntries(*this, GIDs, PIDs);
2273  if (verbose) {
2274  std::ostringstream os;
2275  os << *prefix << "Done; getDirectoryEntries returned "
2276  << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2277  << "; ";
2278  verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2279  os << endl;
2280  std::cerr << os.str();
2281  }
2282  return retVal;
2283  }
2284 
2285  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2286  Teuchos::RCP<const Teuchos::Comm<int> >
2288  return comm_;
2289  }
2290 
2291  template <class LocalOrdinal,class GlobalOrdinal, class Node>
2292  bool
2294  checkIsDist() const
2295  {
2296  using Teuchos::as;
2297  using Teuchos::outArg;
2298  using Teuchos::REDUCE_MIN;
2299  using Teuchos::reduceAll;
2300  using std::endl;
2301 
2302  const bool verbose = Details::Behavior::verbose("Map");
2303  std::unique_ptr<std::string> prefix;
2304  if (verbose) {
2305  prefix = Details::createPrefix(
2306  comm_.getRawPtr(), "Map", "checkIsDist");
2307  std::ostringstream os;
2308  os << *prefix << "Start" << endl;
2309  std::cerr << os.str();
2310  }
2311 
2312  bool global = false;
2313  if (comm_->getSize () > 1) {
2314  // The communicator has more than one process, but that doesn't
2315  // necessarily mean the Map is distributed.
2316  int localRep = 0;
2317  if (numGlobalElements_ == as<global_size_t> (numLocalElements_)) {
2318  // The number of local elements on this process equals the
2319  // number of global elements.
2320  //
2321  // NOTE (mfh 22 Nov 2011) Does this still work if there were
2322  // duplicates in the global ID list on input (the third Map
2323  // constructor), so that the number of local elements (which
2324  // are not duplicated) on this process could be less than the
2325  // number of global elements, even if this process owns all
2326  // the elements?
2327  localRep = 1;
2328  }
2329  int allLocalRep;
2330  reduceAll<int, int> (*comm_, REDUCE_MIN, localRep, outArg (allLocalRep));
2331  if (allLocalRep != 1) {
2332  // At least one process does not own all the elements.
2333  // This makes the Map a distributed Map.
2334  global = true;
2335  }
2336  }
2337  // If the communicator has only one process, then the Map is not
2338  // distributed.
2339 
2340  if (verbose) {
2341  std::ostringstream os;
2342  os << *prefix << "Done; global=" << (global ? "true" : "false")
2343  << endl;
2344  std::cerr << os.str();
2345  }
2346  return global;
2347  }
2348 
2349 } // namespace Tpetra
2350 
2351 template <class LocalOrdinal, class GlobalOrdinal>
2352 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2353 Tpetra::createLocalMap (const size_t numElements,
2354  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2355 {
2356  typedef LocalOrdinal LO;
2357  typedef GlobalOrdinal GO;
2358  using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2359  return createLocalMapWithNode<LO, GO, NT> (numElements, comm);
2360 }
2361 
2362 template <class LocalOrdinal, class GlobalOrdinal>
2363 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2364 Tpetra::createUniformContigMap (const global_size_t numElements,
2365  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2366 {
2367  typedef LocalOrdinal LO;
2368  typedef GlobalOrdinal GO;
2369  using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2370  return createUniformContigMapWithNode<LO, GO, NT> (numElements, comm);
2371 }
2372 
2373 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2374 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2375 Tpetra::createUniformContigMapWithNode (const global_size_t numElements,
2376  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2377 )
2378 {
2379  using Teuchos::rcp;
2381  const GlobalOrdinal indexBase = static_cast<GlobalOrdinal> (0);
2382 
2383  return rcp (new map_type (numElements, indexBase, comm, GloballyDistributed));
2384 }
2385 
2386 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2387 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2388 Tpetra::createLocalMapWithNode (const size_t numElements,
2389  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2390 )
2391 {
2392  using Tpetra::global_size_t;
2393  using Teuchos::rcp;
2395  const GlobalOrdinal indexBase = 0;
2396  const global_size_t globalNumElts = static_cast<global_size_t> (numElements);
2397 
2398  return rcp (new map_type (globalNumElts, indexBase, comm, LocallyReplicated));
2399 }
2400 
2401 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2402 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2404  const size_t localNumElements,
2405  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2406 )
2407 {
2408  using Teuchos::rcp;
2410  const GlobalOrdinal indexBase = 0;
2411 
2412  return rcp (new map_type (numElements, localNumElements, indexBase, comm));
2413 }
2414 
2415 template <class LocalOrdinal, class GlobalOrdinal>
2416 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2418  const size_t localNumElements,
2419  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2420 {
2421  typedef LocalOrdinal LO;
2422  typedef GlobalOrdinal GO;
2423  using NT = typename Tpetra::Map<LO, GO>::node_type;
2424 
2425  return Tpetra::createContigMapWithNode<LO, GO, NT> (numElements, localNumElements, comm);
2426 }
2427 
2428 template <class LocalOrdinal, class GlobalOrdinal>
2429 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2430 Tpetra::createNonContigMap(const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2431  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2432 {
2433  typedef LocalOrdinal LO;
2434  typedef GlobalOrdinal GO;
2435  using NT = typename Tpetra::Map<LO, GO>::node_type;
2436 
2437  return Tpetra::createNonContigMapWithNode<LO, GO, NT> (elementList, comm);
2438 }
2439 
2440 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2441 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2442 Tpetra::createNonContigMapWithNode (const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2443  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2444 )
2445 {
2446  using Teuchos::rcp;
2448  using GST = Tpetra::global_size_t;
2449  const GST INV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2450  // FIXME (mfh 22 Jul 2016) This is what I found here, but maybe this
2451  // shouldn't be zero, given that the index base is supposed to equal
2452  // the globally min global index?
2453  const GlobalOrdinal indexBase = 0;
2454 
2455  return rcp (new map_type (INV, elementList, indexBase, comm));
2456 }
2457 
2458 template<class LO, class GO, class NT>
2459 Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >
2460 Tpetra::createOneToOne (const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& M)
2461 {
2463  using Teuchos::Array;
2464  using Teuchos::ArrayView;
2465  using Teuchos::as;
2466  using Teuchos::rcp;
2467  using std::cerr;
2468  using std::endl;
2469  using map_type = Tpetra::Map<LO, GO, NT>;
2470  using GST = global_size_t;
2471 
2472  const bool verbose = Details::Behavior::verbose("Map");
2473  std::unique_ptr<std::string> prefix;
2474  if (verbose) {
2475  auto comm = M.is_null() ? Teuchos::null : M->getComm();
2476  prefix = Details::createPrefix(
2477  comm.getRawPtr(), "createOneToOne(Map)");
2478  std::ostringstream os;
2479  os << *prefix << "Start" << endl;
2480  cerr << os.str();
2481  }
2482  const size_t maxNumToPrint = verbose ?
2483  Details::Behavior::verbosePrintCountThreshold() : size_t(0);
2484  const GST GINV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2485  const int myRank = M->getComm ()->getRank ();
2486 
2487  // Bypasses for special cases where either M is known to be
2488  // one-to-one, or the one-to-one version of M is easy to compute.
2489  // This is why we take M as an RCP, not as a const reference -- so
2490  // that we can return M itself if it is 1-to-1.
2491  if (! M->isDistributed ()) {
2492  // For a locally replicated Map, we assume that users want to push
2493  // all the GIDs to Process 0.
2494 
2495  // mfh 05 Nov 2013: getGlobalNumElements() does indeed return what
2496  // you think it should return, in this special case of a locally
2497  // replicated contiguous Map.
2498  const GST numGlobalEntries = M->getGlobalNumElements ();
2499  if (M->isContiguous()) {
2500  const size_t numLocalEntries =
2501  (myRank == 0) ? as<size_t>(numGlobalEntries) : size_t(0);
2502  if (verbose) {
2503  std::ostringstream os;
2504  os << *prefix << "Input is locally replicated & contiguous; "
2505  "numLocalEntries=" << numLocalEntries << endl;
2506  cerr << os.str ();
2507  }
2508  auto retMap =
2509  rcp(new map_type(numGlobalEntries, numLocalEntries,
2510  M->getIndexBase(), M->getComm()));
2511  if (verbose) {
2512  std::ostringstream os;
2513  os << *prefix << "Done" << endl;
2514  cerr << os.str ();
2515  }
2516  return retMap;
2517  }
2518  else {
2519  if (verbose) {
2520  std::ostringstream os;
2521  os << *prefix << "Input is locally replicated & noncontiguous"
2522  << endl;
2523  cerr << os.str ();
2524  }
2525  ArrayView<const GO> myGids =
2526  (myRank == 0) ? M->getNodeElementList() : Teuchos::null;
2527  auto retMap =
2528  rcp(new map_type(GINV, myGids(), M->getIndexBase(),
2529  M->getComm()));
2530  if (verbose) {
2531  std::ostringstream os;
2532  os << *prefix << "Done" << endl;
2533  cerr << os.str ();
2534  }
2535  return retMap;
2536  }
2537  }
2538  else if (M->isContiguous ()) {
2539  if (verbose) {
2540  std::ostringstream os;
2541  os << *prefix << "Input is distributed & contiguous" << endl;
2542  cerr << os.str ();
2543  }
2544  // Contiguous, distributed Maps are one-to-one by construction.
2545  // (Locally replicated Maps can be contiguous.)
2546  return M;
2547  }
2548  else {
2549  if (verbose) {
2550  std::ostringstream os;
2551  os << *prefix << "Input is distributed & noncontiguous" << endl;
2552  cerr << os.str ();
2553  }
2555  const size_t numMyElems = M->getNodeNumElements ();
2556  ArrayView<const GO> myElems = M->getNodeElementList ();
2557  Array<int> owner_procs_vec (numMyElems);
2558 
2559  if (verbose) {
2560  std::ostringstream os;
2561  os << *prefix << "Call Directory::getDirectoryEntries: ";
2562  verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2563  os << endl;
2564  cerr << os.str();
2565  }
2566  directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2567  if (verbose) {
2568  std::ostringstream os;
2569  os << *prefix << "getDirectoryEntries result: ";
2570  verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2571  os << endl;
2572  cerr << os.str();
2573  }
2574 
2575  Array<GO> myOwned_vec (numMyElems);
2576  size_t numMyOwnedElems = 0;
2577  for (size_t i = 0; i < numMyElems; ++i) {
2578  const GO GID = myElems[i];
2579  const int owner = owner_procs_vec[i];
2580 
2581  if (myRank == owner) {
2582  myOwned_vec[numMyOwnedElems++] = GID;
2583  }
2584  }
2585  myOwned_vec.resize (numMyOwnedElems);
2586 
2587  if (verbose) {
2588  std::ostringstream os;
2589  os << *prefix << "Create Map: ";
2590  verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2591  os << endl;
2592  cerr << os.str();
2593  }
2594  auto retMap = rcp(new map_type(GINV, myOwned_vec(),
2595  M->getIndexBase(), M->getComm()));
2596  if (verbose) {
2597  std::ostringstream os;
2598  os << *prefix << "Done" << endl;
2599  cerr << os.str();
2600  }
2601  return retMap;
2602  }
2603 }
2604 
2605 template<class LocalOrdinal, class GlobalOrdinal, class Node>
2606 Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2609 {
2610  using Details::Behavior;
2612  using Teuchos::Array;
2613  using Teuchos::ArrayView;
2614  using Teuchos::RCP;
2615  using Teuchos::rcp;
2616  using Teuchos::toString;
2617  using std::cerr;
2618  using std::endl;
2619  using LO = LocalOrdinal;
2620  using GO = GlobalOrdinal;
2621  using map_type = Tpetra::Map<LO, GO, Node>;
2622 
2623  const bool verbose = Behavior::verbose("Map");
2624  std::unique_ptr<std::string> prefix;
2625  if (verbose) {
2626  auto comm = M.is_null() ? Teuchos::null : M->getComm();
2627  prefix = Details::createPrefix(
2628  comm.getRawPtr(), "createOneToOne(Map,TieBreak)");
2629  std::ostringstream os;
2630  os << *prefix << "Start" << endl;
2631  cerr << os.str();
2632  }
2633  const size_t maxNumToPrint = verbose ?
2634  Behavior::verbosePrintCountThreshold() : size_t(0);
2635 
2636  // FIXME (mfh 20 Feb 2013) We should have a bypass for contiguous
2637  // Maps (which are 1-to-1 by construction).
2638 
2640  if (verbose) {
2641  std::ostringstream os;
2642  os << *prefix << "Initialize Directory" << endl;
2643  cerr << os.str ();
2644  }
2645  directory.initialize (*M, tie_break);
2646  if (verbose) {
2647  std::ostringstream os;
2648  os << *prefix << "Done initializing Directory" << endl;
2649  cerr << os.str ();
2650  }
2651  size_t numMyElems = M->getNodeNumElements ();
2652  ArrayView<const GO> myElems = M->getNodeElementList ();
2653  Array<int> owner_procs_vec (numMyElems);
2654  if (verbose) {
2655  std::ostringstream os;
2656  os << *prefix << "Call Directory::getDirectoryEntries: ";
2657  verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2658  os << endl;
2659  cerr << os.str();
2660  }
2661  directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2662  if (verbose) {
2663  std::ostringstream os;
2664  os << *prefix << "getDirectoryEntries result: ";
2665  verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2666  os << endl;
2667  cerr << os.str();
2668  }
2669 
2670  const int myRank = M->getComm()->getRank();
2671  Array<GO> myOwned_vec (numMyElems);
2672  size_t numMyOwnedElems = 0;
2673  for (size_t i = 0; i < numMyElems; ++i) {
2674  const GO GID = myElems[i];
2675  const int owner = owner_procs_vec[i];
2676  if (myRank == owner) {
2677  myOwned_vec[numMyOwnedElems++] = GID;
2678  }
2679  }
2680  myOwned_vec.resize (numMyOwnedElems);
2681 
2682  // FIXME (mfh 08 May 2014) The above Directory should be perfectly
2683  // valid for the new Map. Why can't we reuse it?
2684  const global_size_t GINV =
2685  Tpetra::Details::OrdinalTraits<global_size_t>::invalid ();
2686  if (verbose) {
2687  std::ostringstream os;
2688  os << *prefix << "Create Map: ";
2689  verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2690  os << endl;
2691  cerr << os.str();
2692  }
2693  RCP<const map_type> retMap
2694  (new map_type (GINV, myOwned_vec (), M->getIndexBase (),
2695  M->getComm ()));
2696  if (verbose) {
2697  std::ostringstream os;
2698  os << *prefix << "Done" << endl;
2699  cerr << os.str();
2700  }
2701  return retMap;
2702 }
2703 
2704 //
2705 // Explicit instantiation macro
2706 //
2707 // Must be expanded from within the Tpetra namespace!
2708 //
2709 
2711 
2712 #define TPETRA_MAP_INSTANT(LO,GO,NODE) \
2713  \
2714  template class Map< LO , GO , NODE >; \
2715  \
2716  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2717  createLocalMapWithNode<LO,GO,NODE> (const size_t numElements, \
2718  const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2719  \
2720  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2721  createContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2722  const size_t localNumElements, \
2723  const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2724  \
2725  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2726  createNonContigMapWithNode(const Teuchos::ArrayView<const GO> &elementList, \
2727  const Teuchos::RCP<const Teuchos::Comm<int> > &comm); \
2728  \
2729  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2730  createUniformContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2731  const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2732  \
2733  template Teuchos::RCP<const Map<LO,GO,NODE> > \
2734  createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M); \
2735  \
2736  template Teuchos::RCP<const Map<LO,GO,NODE> > \
2737  createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M, \
2738  const Tpetra::Details::TieBreak<LO,GO>& tie_break); \
2739 
2740 
2742 #define TPETRA_MAP_INSTANT_DEFAULTNODE(LO,GO) \
2743  template Teuchos::RCP< const Map<LO,GO> > \
2744  createLocalMap<LO,GO>( const size_t, const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2745  \
2746  template Teuchos::RCP< const Map<LO,GO> > \
2747  createContigMap<LO,GO>( global_size_t, size_t, \
2748  const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2749  \
2750  template Teuchos::RCP< const Map<LO,GO> > \
2751  createNonContigMap(const Teuchos::ArrayView<const GO> &, \
2752  const Teuchos::RCP<const Teuchos::Comm<int> > &); \
2753  \
2754  template Teuchos::RCP< const Map<LO,GO> > \
2755  createUniformContigMap<LO,GO>( const global_size_t, \
2756  const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2757 
2758 #endif // TPETRA_MAP_DEF_HPP
Functions for initializing and finalizing Tpetra.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::extractMpiCommFromTeuchos.
Declaration of a function that prints strings from each process.
Declaration of Tpetra::Details::initializeKokkos.
Declaration of Tpetra::Details::printOnce.
Stand-alone utility functions and macros.
Description of Tpetra's behavior.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
"Local" part of Map suitable for Kokkos kernels.
Interface for breaking ties in ownership.
Implement mapping from global ID to process ID and local ID.
LookupStatus getDirectoryEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs) const
Given a global ID list, return the list of their owning process IDs.
void initialize(const map_type &map)
Initialize the Directory with its Map.
A parallel distribution of indices over processes.
bool isDistributed() const
Whether this Map is globally distributed or locally replicated.
bool isOneToOne() const
Whether the Map is one to one.
std::string description() const
Implementation of Teuchos::Describable.
global_ordinal_type getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
Node node_type
Legacy typedef that will go away at some point.
Map()
Default constructor (that does nothing).
GlobalOrdinal global_ordinal_type
The type of global indices.
Teuchos::ArrayView< const global_ordinal_type > getNodeElementList() const
Return a NONOWNING view of the global indices owned by this process.
LookupStatus getRemoteIndexList(const Teuchos::ArrayView< const global_ordinal_type > &GIDList, const Teuchos::ArrayView< int > &nodeIDList, const Teuchos::ArrayView< local_ordinal_type > &LIDList) const
Return the process ranks and corresponding local indices for the given global indices.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
bool isUniform() const
Whether the range of global indices is uniform.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
typename device_type::execution_space execution_space
The Kokkos execution space.
LO local_ordinal_type
The type of local indices.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > removeEmptyProcesses() const
Advanced methods.
global_ordinal_type getMaxAllGlobalIndex() const
The maximum global index over all processes in the communicator.
bool isCompatible(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is compatible with this Map.
global_ordinal_type getIndexBase() const
The index base for this Map.
bool locallySameAs(const Map< local_ordinal_type, global_ordinal_type, node_type > &map) const
Is this Map locally the same as the input Map?
bool isLocallyFitted(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is locally fitted to this Map.
virtual ~Map()
Destructor (virtual for memory safety of derived classes).
global_ordinal_type getMinGlobalIndex() const
The minimum global index owned by the calling process.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > replaceCommWithSubset(const Teuchos::RCP< const Teuchos::Comm< int > > &newComm) const
Replace this Map's communicator with a subset communicator.
global_ordinal_type getMaxGlobalIndex() const
The maximum global index owned by the calling process.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Describe this object in a human-readable way to the given output stream.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
bool isSameAs(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is identical to this Map.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
global_indices_array_type getMyGlobalIndices() const
Return a view of the global indices owned by this process.
typename Node::device_type device_type
This class' Kokkos::Device specialization.
Implementation details of Tpetra.
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
bool congruent(const Teuchos::Comm< int > &comm1, const Teuchos::Comm< int > &comm2)
Whether the two communicators are congruent.
Definition: Tpetra_Util.cpp:64
void initializeKokkos()
Initialize Kokkos, using command-line arguments (if any) given to Teuchos::GlobalMPISession.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createLocalMap(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with the default Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createUniformContigMap(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with the default Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createNonContigMap(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a non-contiguous Map using the default Kokkos::Device type.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createUniformContigMapWithNode(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with a user-specified Kokkos Node.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createContigMap(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a (potentially) non-uniformly distributed, contiguous Map using the defaul...
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createLocalMapWithNode(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with a specified Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createNonContigMapWithNode(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a noncontiguous Map with a user-specified, possibly nondefault Kokkos Node ...
LocalGlobal
Enum for local versus global allocation of Map entries.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createContigMapWithNode(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a (potentially) nonuniformly distributed, contiguous Map for a user-specifi...