Tpetra parallel linear algebra  Version of the Day
Tpetra_DistObject_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_DISTOBJECT_DEF_HPP
41 #define TPETRA_DISTOBJECT_DEF_HPP
42 
50 
51 #include "Tpetra_Distributor.hpp"
54 #include "Tpetra_Details_checkGlobalError.hpp"
56 #include "Tpetra_Util.hpp" // Details::createPrefix
57 #include "Teuchos_CommHelpers.hpp"
58 #include "Teuchos_TypeNameTraits.hpp"
59 #include <typeinfo>
60 #include <memory>
61 #include <sstream>
62 
63 namespace Tpetra {
64 
65  namespace { // (anonymous)
66  template<class DeviceType, class IndexType = size_t>
67  struct SumFunctor {
68  SumFunctor (const Kokkos::View<const size_t*, DeviceType>& viewToSum) :
69  viewToSum_ (viewToSum) {}
70  KOKKOS_INLINE_FUNCTION void operator() (const IndexType i, size_t& lclSum) const {
71  lclSum += viewToSum_(i);
72  }
73  Kokkos::View<const size_t*, DeviceType> viewToSum_;
74  };
75 
76  template<class DeviceType, class IndexType = size_t>
77  size_t
78  countTotalImportPackets (const Kokkos::View<const size_t*, DeviceType>& numImportPacketsPerLID)
79  {
80  using Kokkos::parallel_reduce;
81  typedef DeviceType DT;
82  typedef typename DT::execution_space DES;
83  typedef Kokkos::RangePolicy<DES, IndexType> range_type;
84 
85  const IndexType numOut = numImportPacketsPerLID.extent (0);
86  size_t totalImportPackets = 0;
87  parallel_reduce ("Count import packets",
88  range_type (0, numOut),
89  SumFunctor<DeviceType, IndexType> (numImportPacketsPerLID),
90  totalImportPackets);
91  return totalImportPackets;
92  }
93  } // namespace (anonymous)
94 
95 
96  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
98  DistObject (const Teuchos::RCP<const map_type>& map) :
99  map_ (map)
100  {
101 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
102  using Teuchos::RCP;
103  using Teuchos::Time;
104  using Teuchos::TimeMonitor;
105 
106  RCP<Time> doXferTimer =
107  TimeMonitor::lookupCounter ("Tpetra::DistObject::doTransfer");
108  if (doXferTimer.is_null ()) {
109  doXferTimer =
110  TimeMonitor::getNewCounter ("Tpetra::DistObject::doTransfer");
111  }
112  doXferTimer_ = doXferTimer;
113 
114  RCP<Time> copyAndPermuteTimer =
115  TimeMonitor::lookupCounter ("Tpetra::DistObject::copyAndPermute");
116  if (copyAndPermuteTimer.is_null ()) {
117  copyAndPermuteTimer =
118  TimeMonitor::getNewCounter ("Tpetra::DistObject::copyAndPermute");
119  }
120  copyAndPermuteTimer_ = copyAndPermuteTimer;
121 
122  RCP<Time> packAndPrepareTimer =
123  TimeMonitor::lookupCounter ("Tpetra::DistObject::packAndPrepare");
124  if (packAndPrepareTimer.is_null ()) {
125  packAndPrepareTimer =
126  TimeMonitor::getNewCounter ("Tpetra::DistObject::packAndPrepare");
127  }
128  packAndPrepareTimer_ = packAndPrepareTimer;
129 
130  RCP<Time> doPostsAndWaitsTimer =
131  TimeMonitor::lookupCounter ("Tpetra::DistObject::doPostsAndWaits");
132  if (doPostsAndWaitsTimer.is_null ()) {
133  doPostsAndWaitsTimer =
134  TimeMonitor::getNewCounter ("Tpetra::DistObject::doPostsAndWaits");
135  }
136  doPostsAndWaitsTimer_ = doPostsAndWaitsTimer;
137 
138  RCP<Time> unpackAndCombineTimer =
139  TimeMonitor::lookupCounter ("Tpetra::DistObject::unpackAndCombine");
140  if (unpackAndCombineTimer.is_null ()) {
141  unpackAndCombineTimer =
142  TimeMonitor::getNewCounter ("Tpetra::DistObject::unpackAndCombine");
143  }
144  unpackAndCombineTimer_ = unpackAndCombineTimer;
145 #endif // HAVE_TPETRA_TRANSFER_TIMERS
146  }
147 
148  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
149  std::string
151  description () const
152  {
153  using Teuchos::TypeNameTraits;
154 
155  std::ostringstream os;
156  os << "\"Tpetra::DistObject\": {"
157  << "Packet: " << TypeNameTraits<packet_type>::name ()
158  << ", LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name ()
159  << ", GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name ()
160  << ", Node: " << TypeNameTraits<Node>::name ();
161  if (this->getObjectLabel () != "") {
162  os << "Label: \"" << this->getObjectLabel () << "\"";
163  }
164  os << "}";
165  return os.str ();
166  }
167 
168  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
169  void
171  describe (Teuchos::FancyOStream &out,
172  const Teuchos::EVerbosityLevel verbLevel) const
173  {
174  using Teuchos::rcpFromRef;
175  using Teuchos::TypeNameTraits;
176  using std::endl;
177  const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ?
178  Teuchos::VERB_LOW : verbLevel;
179  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getMap ()->getComm ();
180  const int myRank = comm.is_null () ? 0 : comm->getRank ();
181  const int numProcs = comm.is_null () ? 1 : comm->getSize ();
182 
183  if (vl != Teuchos::VERB_NONE) {
184  Teuchos::OSTab tab0 (out);
185  if (myRank == 0) {
186  out << "\"Tpetra::DistObject\":" << endl;
187  }
188  Teuchos::OSTab tab1 (out);
189  if (myRank == 0) {
190  out << "Template parameters:" << endl;
191  {
192  Teuchos::OSTab tab2 (out);
193  out << "Packet: " << TypeNameTraits<packet_type>::name () << endl
194  << "LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name () << endl
195  << "GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name () << endl
196  << "Node: " << TypeNameTraits<node_type>::name () << endl;
197  }
198  if (this->getObjectLabel () != "") {
199  out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
200  }
201  } // if myRank == 0
202 
203  // Describe the Map.
204  {
205  if (myRank == 0) {
206  out << "Map:" << endl;
207  }
208  Teuchos::OSTab tab2 (out);
209  map_->describe (out, vl);
210  }
211 
212  // At verbosity > VERB_LOW, each process prints something.
213  if (vl > Teuchos::VERB_LOW) {
214  for (int p = 0; p < numProcs; ++p) {
215  if (myRank == p) {
216  out << "Process " << myRank << ":" << endl;
217  Teuchos::OSTab tab2 (out);
218  out << "Export buffer size (in packets): "
219  << exports_.extent (0)
220  << endl
221  << "Import buffer size (in packets): "
222  << imports_.extent (0)
223  << endl;
224  }
225  if (! comm.is_null ()) {
226  comm->barrier (); // give output time to finish
227  comm->barrier ();
228  comm->barrier ();
229  }
230  } // for each process rank p
231  } // if vl > VERB_LOW
232  } // if vl != VERB_NONE
233  }
234 
235  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
236  void
238  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& /* newMap */)
239  {
240  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
241  "Tpetra::DistObject::removeEmptyProcessesInPlace: Not implemented");
242  }
243 
244  /* These are provided in base DistObject template
245  template<class DistObjectType>
246  void
247  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input,
248  const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
249  typename DistObjectType::global_ordinal_type,
250  typename DistObjectType::node_type> >& newMap)
251  {
252  input->removeEmptyProcessesInPlace (newMap);
253  if (newMap.is_null ()) { // my process is excluded
254  input = Teuchos::null;
255  }
256  }
257 
258  template<class DistObjectType>
259  void
260  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input)
261  {
262  using Teuchos::RCP;
263  typedef typename DistObjectType::local_ordinal_type LO;
264  typedef typename DistObjectType::global_ordinal_type GO;
265  typedef typename DistObjectType::node_type NT;
266  typedef Map<LO, GO, NT> map_type;
267 
268  RCP<const map_type> newMap = input->getMap ()->removeEmptyProcesses ();
269  removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
270  }
271  */
272 
273  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
274  void
276  doImport (const SrcDistObject& source,
278  const CombineMode CM,
279  const bool restrictedMode)
280  {
281  using Details::Behavior;
282  using std::endl;
283  const char modeString[] = "doImport (forward mode)";
284 
285  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
286  // output to std::cerr on every MPI process. This is unwise for
287  // runs with large numbers of MPI processes.
288  const bool verbose = Behavior::verbose("DistObject");
289  std::unique_ptr<std::string> prefix;
290  if (verbose) {
291  prefix = this->createPrefix("DistObject", modeString);
292  std::ostringstream os;
293  os << *prefix << "Start" << endl;
294  std::cerr << os.str ();
295  }
296  this->beginImport(source, importer, CM, restrictedMode);
297  this->endImport(source, importer, CM, restrictedMode);
298  if (verbose) {
299  std::ostringstream os;
300  os << *prefix << "Done" << endl;
301  std::cerr << os.str ();
302  }
303  }
304 
305  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
306  void
308  doExport (const SrcDistObject& source,
310  const CombineMode CM,
311  const bool restrictedMode)
312  {
313  using Details::Behavior;
314  using std::endl;
315  const char modeString[] = "doExport (forward mode)";
316 
317  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
318  // output to std::cerr on every MPI process. This is unwise for
319  // runs with large numbers of MPI processes.
320  const bool verbose = Behavior::verbose("DistObject");
321  std::unique_ptr<std::string> prefix;
322  if (verbose) {
323  prefix = this->createPrefix("DistObject", modeString);
324  std::ostringstream os;
325  os << *prefix << "Start" << endl;
326  std::cerr << os.str ();
327  }
328  this->beginExport(source, exporter, CM, restrictedMode);
329  this->endExport(source, exporter, CM, restrictedMode);
330  if (verbose) {
331  std::ostringstream os;
332  os << *prefix << "Done" << endl;
333  std::cerr << os.str ();
334  }
335  }
336 
337  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
338  void
340  doImport (const SrcDistObject& source,
342  const CombineMode CM,
343  const bool restrictedMode)
344  {
345  using Details::Behavior;
346  using std::endl;
347  const char modeString[] = "doImport (reverse mode)";
348 
349  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
350  // output to std::cerr on every MPI process. This is unwise for
351  // runs with large numbers of MPI processes.
352  const bool verbose = Behavior::verbose("DistObject");
353  std::unique_ptr<std::string> prefix;
354  if (verbose) {
355  prefix = this->createPrefix("DistObject", modeString);
356  std::ostringstream os;
357  os << *prefix << "Start" << endl;
358  std::cerr << os.str ();
359  }
360  this->beginImport(source, exporter, CM, restrictedMode);
361  this->endImport(source, exporter, CM, restrictedMode);
362  if (verbose) {
363  std::ostringstream os;
364  os << *prefix << "Done" << endl;
365  std::cerr << os.str ();
366  }
367  }
368 
369  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
370  void
372  doExport (const SrcDistObject& source,
374  const CombineMode CM,
375  const bool restrictedMode)
376  {
377  using Details::Behavior;
378  using std::endl;
379  const char modeString[] = "doExport (reverse mode)";
380 
381  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
382  // output to std::cerr on every MPI process. This is unwise for
383  // runs with large numbers of MPI processes.
384  const bool verbose = Behavior::verbose("DistObject");
385  std::unique_ptr<std::string> prefix;
386  if (verbose) {
387  prefix = this->createPrefix("DistObject", modeString);
388  std::ostringstream os;
389  os << *prefix << "Start" << endl;
390  std::cerr << os.str ();
391  }
392  this->beginExport(source, importer, CM, restrictedMode);
393  this->endExport(source, importer, CM, restrictedMode);
394  if (verbose) {
395  std::ostringstream os;
396  os << *prefix << "Done" << endl;
397  std::cerr << os.str ();
398  }
399  }
400 
401  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
402  void
404  beginImport(const SrcDistObject& source,
406  const CombineMode CM,
407  const bool restrictedMode)
408  {
409  using Details::Behavior;
410  using std::endl;
411  const char modeString[] = "doImport (forward mode)";
412 
413  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
414  // output to std::cerr on every MPI process. This is unwise for
415  // runs with large numbers of MPI processes.
416  const bool verbose = Behavior::verbose("DistObject");
417  std::unique_ptr<std::string> prefix;
418  if (verbose) {
419  prefix = this->createPrefix("DistObject", modeString);
420  std::ostringstream os;
421  os << *prefix << "Start" << endl;
422  std::cerr << os.str ();
423  }
424  this->beginTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
425  if (verbose) {
426  std::ostringstream os;
427  os << *prefix << "Done" << endl;
428  std::cerr << os.str ();
429  }
430  }
431 
432  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
433  void
434  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
435  beginExport(const SrcDistObject& source,
436  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
437  const CombineMode CM,
438  const bool restrictedMode)
439  {
440  using Details::Behavior;
441  using std::endl;
442  const char modeString[] = "doExport (forward mode)";
443 
444  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
445  // output to std::cerr on every MPI process. This is unwise for
446  // runs with large numbers of MPI processes.
447  const bool verbose = Behavior::verbose("DistObject");
448  std::unique_ptr<std::string> prefix;
449  if (verbose) {
450  prefix = this->createPrefix("DistObject", modeString);
451  std::ostringstream os;
452  os << *prefix << "Start" << endl;
453  std::cerr << os.str ();
454  }
455  this->beginTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
456  if (verbose) {
457  std::ostringstream os;
458  os << *prefix << "Done" << endl;
459  std::cerr << os.str ();
460  }
461  }
462 
463  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
464  void
465  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
466  beginImport(const SrcDistObject& source,
467  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
468  const CombineMode CM,
469  const bool restrictedMode)
470  {
471  using Details::Behavior;
472  using std::endl;
473  const char modeString[] = "doImport (reverse mode)";
474 
475  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
476  // output to std::cerr on every MPI process. This is unwise for
477  // runs with large numbers of MPI processes.
478  const bool verbose = Behavior::verbose("DistObject");
479  std::unique_ptr<std::string> prefix;
480  if (verbose) {
481  prefix = this->createPrefix("DistObject", modeString);
482  std::ostringstream os;
483  os << *prefix << "Start" << endl;
484  std::cerr << os.str ();
485  }
486  this->beginTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
487  if (verbose) {
488  std::ostringstream os;
489  os << *prefix << "Done" << endl;
490  std::cerr << os.str ();
491  }
492  }
493 
494  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
495  void
496  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
497  beginExport(const SrcDistObject& source,
498  const Import<LocalOrdinal, GlobalOrdinal, Node> & importer,
499  const CombineMode CM,
500  const bool restrictedMode)
501  {
502  using Details::Behavior;
503  using std::endl;
504  const char modeString[] = "doExport (reverse mode)";
505 
506  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
507  // output to std::cerr on every MPI process. This is unwise for
508  // runs with large numbers of MPI processes.
509  const bool verbose = Behavior::verbose("DistObject");
510  std::unique_ptr<std::string> prefix;
511  if (verbose) {
512  prefix = this->createPrefix("DistObject", modeString);
513  std::ostringstream os;
514  os << *prefix << "Start" << endl;
515  std::cerr << os.str ();
516  }
517  this->beginTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
518  if (verbose) {
519  std::ostringstream os;
520  os << *prefix << "Done" << endl;
521  std::cerr << os.str ();
522  }
523  }
524 
525  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
526  void
527  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
528  endImport(const SrcDistObject& source,
529  const Import<LocalOrdinal, GlobalOrdinal, Node>& importer,
530  const CombineMode CM,
531  const bool restrictedMode)
532  {
533  using Details::Behavior;
534  using std::endl;
535  const char modeString[] = "doImport (forward mode)";
536 
537  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
538  // output to std::cerr on every MPI process. This is unwise for
539  // runs with large numbers of MPI processes.
540  const bool verbose = Behavior::verbose("DistObject");
541  std::unique_ptr<std::string> prefix;
542  if (verbose) {
543  prefix = this->createPrefix("DistObject", modeString);
544  std::ostringstream os;
545  os << *prefix << "Start" << endl;
546  std::cerr << os.str ();
547  }
548  this->endTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
549  if (verbose) {
550  std::ostringstream os;
551  os << *prefix << "Done" << endl;
552  std::cerr << os.str ();
553  }
554  }
555 
556  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
557  void
558  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
559  endExport(const SrcDistObject& source,
560  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
561  const CombineMode CM,
562  const bool restrictedMode)
563  {
564  using Details::Behavior;
565  using std::endl;
566  const char modeString[] = "doExport (forward mode)";
567 
568  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
569  // output to std::cerr on every MPI process. This is unwise for
570  // runs with large numbers of MPI processes.
571  const bool verbose = Behavior::verbose("DistObject");
572  std::unique_ptr<std::string> prefix;
573  if (verbose) {
574  prefix = this->createPrefix("DistObject", modeString);
575  std::ostringstream os;
576  os << *prefix << "Start" << endl;
577  std::cerr << os.str ();
578  }
579  this->endTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
580  if (verbose) {
581  std::ostringstream os;
582  os << *prefix << "Done" << endl;
583  std::cerr << os.str ();
584  }
585  }
586 
587  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
588  void
589  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
590  endImport(const SrcDistObject& source,
591  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
592  const CombineMode CM,
593  const bool restrictedMode)
594  {
595  using Details::Behavior;
596  using std::endl;
597  const char modeString[] = "doImport (reverse mode)";
598 
599  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
600  // output to std::cerr on every MPI process. This is unwise for
601  // runs with large numbers of MPI processes.
602  const bool verbose = Behavior::verbose("DistObject");
603  std::unique_ptr<std::string> prefix;
604  if (verbose) {
605  prefix = this->createPrefix("DistObject", modeString);
606  std::ostringstream os;
607  os << *prefix << "Start" << endl;
608  std::cerr << os.str ();
609  }
610  this->endTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
611  if (verbose) {
612  std::ostringstream os;
613  os << *prefix << "Done" << endl;
614  std::cerr << os.str ();
615  }
616  }
617 
618  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
619  void
620  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
621  endExport(const SrcDistObject& source,
622  const Import<LocalOrdinal, GlobalOrdinal, Node> & importer,
623  const CombineMode CM,
624  const bool restrictedMode)
625  {
626  using Details::Behavior;
627  using std::endl;
628  const char modeString[] = "doExport (reverse mode)";
629 
630  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
631  // output to std::cerr on every MPI process. This is unwise for
632  // runs with large numbers of MPI processes.
633  const bool verbose = Behavior::verbose("DistObject");
634  std::unique_ptr<std::string> prefix;
635  if (verbose) {
636  prefix = this->createPrefix("DistObject", modeString);
637  std::ostringstream os;
638  os << *prefix << "Start" << endl;
639  std::cerr << os.str ();
640  }
641  this->endTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
642  if (verbose) {
643  std::ostringstream os;
644  os << *prefix << "Done" << endl;
645  std::cerr << os.str ();
646  }
647  }
648 
649  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
650  bool
652  isDistributed () const {
653  return map_->isDistributed ();
654  }
655 
656  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
657  size_t
659  constantNumberOfPackets () const {
660  return 0; // default implementation; subclasses may override
661  }
662 
663  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
664  void
667  const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
668  const char modeString[],
669  const ReverseOption revOp,
670  const CombineMode CM,
671  bool restrictedMode)
672  {
673  beginTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
674  endTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
675  }
676 
677  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
678  bool
680  reallocImportsIfNeeded (const size_t newSize,
681  const bool verbose,
682  const std::string* prefix,
683  const bool /*remoteLIDsContiguous*/,
684  const CombineMode /*CM*/)
685  {
686  if (verbose) {
687  std::ostringstream os;
688  os << *prefix << "Realloc (if needed) imports_ from "
689  << imports_.extent (0) << " to " << newSize << std::endl;
690  std::cerr << os.str ();
691  }
693  const bool reallocated =
694  reallocDualViewIfNeeded (this->imports_, newSize, "imports");
695  if (verbose) {
696  std::ostringstream os;
697  os << *prefix << "Finished realloc'ing imports_" << std::endl;
698  std::cerr << os.str ();
699  }
700  return reallocated;
701  }
702 
703  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
704  bool
706  reallocArraysForNumPacketsPerLid (const size_t numExportLIDs,
707  const size_t numImportLIDs)
708  {
709  using Details::Behavior;
712  using std::endl;
713  // If an array is already allocated, and if is at least
714  // tooBigFactor times bigger than it needs to be, free it and
715  // reallocate to the size we need, in order to save space.
716  // Otherwise, take subviews to reduce allocation size.
717  constexpr size_t tooBigFactor = 10;
718 
719  const bool verbose = Behavior::verbose("DistObject");
720  std::unique_ptr<std::string> prefix;
721  if (verbose) {
722  prefix = this->createPrefix("DistObject",
723  "reallocArraysForNumPacketsPerLid");
724  std::ostringstream os;
725  os << *prefix
726  << "numExportLIDs: " << numExportLIDs
727  << ", numImportLIDs: " << numImportLIDs
728  << endl;
729  os << *prefix << "DualView status before:" << endl
730  << *prefix
731  << dualViewStatusToString (this->numExportPacketsPerLID_,
732  "numExportPacketsPerLID_")
733  << endl
734  << *prefix
735  << dualViewStatusToString (this->numImportPacketsPerLID_,
736  "numImportPacketsPerLID_")
737  << endl;
738  std::cerr << os.str ();
739  }
740 
741  // Reallocate numExportPacketsPerLID_ if needed.
742  const bool firstReallocated =
743  reallocDualViewIfNeeded (this->numExportPacketsPerLID_,
744  numExportLIDs,
745  "numExportPacketsPerLID",
746  tooBigFactor,
747  true); // need fence before, if realloc'ing
748 
749  // If we reallocated above, then we fenced after that
750  // reallocation. This means that we don't need to fence again,
751  // before the next reallocation.
752  const bool needFenceBeforeNextAlloc = ! firstReallocated;
753  const bool secondReallocated =
754  reallocDualViewIfNeeded (this->numImportPacketsPerLID_,
755  numImportLIDs,
756  "numImportPacketsPerLID",
757  tooBigFactor,
758  needFenceBeforeNextAlloc);
759 
760  if (verbose) {
761  std::ostringstream os;
762  os << *prefix << "DualView status after:" << endl
763  << *prefix << dualViewStatusToString (this->numExportPacketsPerLID_,
764  "numExportPacketsPerLID_")
765  << endl
766  << *prefix << dualViewStatusToString (this->numImportPacketsPerLID_,
767  "numImportPacketsPerLID_")
768  << endl;
769  std::cerr << os.str ();
770  }
771 
772  return firstReallocated || secondReallocated;
773  }
774 
775  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
776  void
778  beginTransfer(const SrcDistObject& src,
779  const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
780  const char modeString[],
781  const ReverseOption revOp,
782  const CombineMode CM,
783  bool restrictedMode)
784  {
785  using Details::Behavior;
789  using Kokkos::Compat::getArrayView;
790  using Kokkos::Compat::getConstArrayView;
791  using Kokkos::Compat::getKokkosViewDeepCopy;
792  using Kokkos::Compat::create_const_view;
793  using std::endl;
796  const char funcName[] = "Tpetra::DistObject::doTransfer";
797 
798  ProfilingRegion region_doTransfer(funcName);
799  const bool verbose = Behavior::verbose("DistObject");
800  std::shared_ptr<std::string> prefix;
801  if (verbose) {
802  std::ostringstream os;
803  prefix = this->createPrefix("DistObject", "doTransfer");
804  os << *prefix << "Source type: " << Teuchos::typeName(src)
805  << ", Target type: " << Teuchos::typeName(*this) << endl;
806  std::cerr << os.str();
807  }
808 
809  // "Restricted Mode" does two things:
810  // 1) Skips copyAndPermute
811  // 2) Allows the "target" Map of the transfer to be a subset of
812  // the Map of *this, in a "locallyFitted" sense.
813  //
814  // This cannot be used if #2 is not true, OR there are permutes.
815  // Source Maps still need to match
816 
817  // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
818  // checks. These may communicate more.
819  const bool debug = Behavior::debug("DistObject");
820  if (debug) {
821  if (! restrictedMode && revOp == DoForward) {
822  const bool myMapSameAsTransferTgtMap =
823  this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
824  TEUCHOS_TEST_FOR_EXCEPTION
825  (! myMapSameAsTransferTgtMap, std::invalid_argument,
826  "Tpetra::DistObject::" << modeString << ": For forward-mode "
827  "communication, the target DistObject's Map must be the same "
828  "(in the sense of Tpetra::Map::isSameAs) as the input "
829  "Export/Import object's target Map.");
830  }
831  else if (! restrictedMode && revOp == DoReverse) {
832  const bool myMapSameAsTransferSrcMap =
833  this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
834  TEUCHOS_TEST_FOR_EXCEPTION
835  (! myMapSameAsTransferSrcMap, std::invalid_argument,
836  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
837  "communication, the target DistObject's Map must be the same "
838  "(in the sense of Tpetra::Map::isSameAs) as the input "
839  "Export/Import object's source Map.");
840  }
841  else if (restrictedMode && revOp == DoForward) {
842  const bool myMapLocallyFittedTransferTgtMap =
843  this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
844  TEUCHOS_TEST_FOR_EXCEPTION
845  (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
846  "Tpetra::DistObject::" << modeString << ": For forward-mode "
847  "communication using restricted mode, Export/Import object's "
848  "target Map must be locally fitted (in the sense of "
849  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
850  }
851  else { // if (restrictedMode && revOp == DoReverse)
852  const bool myMapLocallyFittedTransferSrcMap =
853  this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
854  TEUCHOS_TEST_FOR_EXCEPTION
855  (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
856  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
857  "communication using restricted mode, Export/Import object's "
858  "source Map must be locally fitted (in the sense of "
859  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
860  }
861 
862  // SrcDistObject need not even _have_ Maps. However, if the
863  // source object is a DistObject, it has a Map, and we may
864  // compare that Map with the Transfer's Maps.
865  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
866  if (srcDistObj != nullptr) {
867  if (revOp == DoForward) {
868  const bool srcMapSameAsImportSrcMap =
869  srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
870  TEUCHOS_TEST_FOR_EXCEPTION
871  (! srcMapSameAsImportSrcMap, std::invalid_argument,
872  "Tpetra::DistObject::" << modeString << ": For forward-mode "
873  "communication, the source DistObject's Map must be the same "
874  "as the input Export/Import object's source Map.");
875  }
876  else { // revOp == DoReverse
877  const bool srcMapSameAsImportTgtMap =
878  srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
879  TEUCHOS_TEST_FOR_EXCEPTION
880  (! srcMapSameAsImportTgtMap, std::invalid_argument,
881  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
882  "communication, the source DistObject's Map must be the same "
883  "as the input Export/Import object's target Map.");
884  }
885  }
886  }
887 
888  const size_t numSameIDs = transfer.getNumSameIDs ();
889  Distributor& distor = transfer.getDistributor ();
890  const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
891 
892  TEUCHOS_TEST_FOR_EXCEPTION
893  (debug && restrictedMode &&
894  (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
895  transfer.getPermuteFromLIDs_dv().extent(0) != 0),
896  std::invalid_argument,
897  "Tpetra::DistObject::" << modeString << ": Transfer object "
898  "cannot have permutes in restricted mode.");
899 
900  // Do we need all communication buffers to live on host?
901  const bool commOnHost = ! Behavior::assumeMpiIsCudaAware ();
902  if (verbose) {
903  std::ostringstream os;
904  os << *prefix << "doTransfer: Use new interface; "
905  "commOnHost=" << (commOnHost ? "true" : "false") << endl;
906  std::cerr << os.str ();
907  }
908 
909  using const_lo_dv_type =
910  Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
911  const_lo_dv_type permuteToLIDs = (revOp == DoForward) ?
912  transfer.getPermuteToLIDs_dv () :
913  transfer.getPermuteFromLIDs_dv ();
914  const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ?
915  transfer.getPermuteFromLIDs_dv () :
916  transfer.getPermuteToLIDs_dv ();
917  const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
918  transfer.getRemoteLIDs_dv () :
919  transfer.getExportLIDs_dv ();
920  const_lo_dv_type exportLIDs = (revOp == DoForward) ?
921  transfer.getExportLIDs_dv () :
922  transfer.getRemoteLIDs_dv ();
923  const bool canTryAliasing = (revOp == DoForward) ?
924  transfer.areRemoteLIDsContiguous() :
925  transfer.areExportLIDsContiguous();
926  // const bool canTryAliasing = false;
927 
928  ProfilingRegion region_dTN(funcName);
929 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
930  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in favor
931  // of Kokkos profiling.
932  Teuchos::TimeMonitor doXferMon (*doXferTimer_);
933 #endif // HAVE_TPETRA_TRANSFER_TIMERS
934 
935  if (verbose) {
936  std::ostringstream os;
937  os << *prefix << "Input arguments:" << endl
938  << *prefix << " combineMode: " << combineModeToString (CM) << endl
939  << *prefix << " numSameIDs: " << numSameIDs << endl
940  << *prefix << " "
941  << dualViewStatusToString (permuteToLIDs, "permuteToLIDs") << endl
942  << *prefix << " "
943  << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs") << endl
944  << *prefix << " "
945  << dualViewStatusToString (remoteLIDs, "remoteLIDs") << endl
946  << *prefix << " "
947  << dualViewStatusToString (exportLIDs, "exportLIDs") << endl
948  << *prefix << " revOp: Do" << (revOp == DoReverse ? "Reverse" : "Forward") << endl
949  << *prefix << " commOnHost: " << (commOnHost ? "true" : "false") << endl;
950  std::cerr << os.str ();
951  }
952 
953  {
954  ProfilingRegion region_cs ("Tpetra::DistObject::doTransferNew::checkSizes");
955  if (verbose) {
956  std::ostringstream os;
957  os << *prefix << "1. checkSizes" << endl;
958  std::cerr << os.str ();
959  }
960  const bool checkSizesResult = this->checkSizes (src);
961  TEUCHOS_TEST_FOR_EXCEPTION
962  (! checkSizesResult, std::invalid_argument,
963  "Tpetra::DistObject::doTransfer: checkSizes() indicates that the "
964  "destination object is not a legal target for redistribution from the "
965  "source object. This probably means that they do not have the same "
966  "dimensions. For example, MultiVectors must have the same number of "
967  "rows and columns.");
968  }
969 
970  // NOTE (mfh 26 Apr 2016) Chris Baker's implementation understood
971  // that if CM == INSERT || CM == REPLACE, the target object could
972  // be write only. We don't optimize for that here.
973 
974  if (!restrictedMode && numSameIDs + permuteToLIDs.extent (0) != 0) {
975  // There is at least one GID to copy or permute.
976  if (verbose) {
977  std::ostringstream os;
978  os << *prefix << "2. copyAndPermute" << endl;
979  std::cerr << os.str ();
980  }
981  ProfilingRegion region_cp
982  ("Tpetra::DistObject::doTransferNew::copyAndPermute");
983 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
984  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in favor
985  // of Kokkos profiling.
986  Teuchos::TimeMonitor copyAndPermuteMon (*copyAndPermuteTimer_);
987 #endif // HAVE_TPETRA_TRANSFER_TIMERS
988 
989  if (numSameIDs + permuteToLIDs.extent (0) != 0) {
990  // There is at least one GID to copy or permute.
991  if (verbose) {
992  std::ostringstream os;
993  os << *prefix << "2. copyAndPermute" << endl;
994  std::cerr << os.str ();
995  }
996  this->copyAndPermute (src, numSameIDs, permuteToLIDs,
997  permuteFromLIDs, CM);
998  if (verbose) {
999  std::ostringstream os;
1000  os << *prefix << "After copyAndPermute:" << endl
1001  << *prefix << " "
1002  << dualViewStatusToString (permuteToLIDs, "permuteToLIDs")
1003  << endl
1004  << *prefix << " "
1005  << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs")
1006  << endl;
1007  std::cerr << os.str ();
1008  }
1009  }
1010  }
1011 
1012  // The method may return zero even if the implementation actually
1013  // does have a constant number of packets per LID. However, if it
1014  // returns nonzero, we may use this information to avoid
1015  // (re)allocating num{Ex,Im}portPacketsPerLID_. packAndPrepare()
1016  // will set this to its final value.
1017  //
1018  // We only need this if CM != ZERO, but it has to be lifted out of
1019  // that scope because there are multiple tests for CM != ZERO.
1020  size_t constantNumPackets = this->constantNumberOfPackets ();
1021  if (verbose) {
1022  std::ostringstream os;
1023  os << *prefix << "constantNumPackets=" << constantNumPackets << endl;
1024  std::cerr << os.str ();
1025  }
1026 
1027  // We only need to pack communication buffers if the combine mode
1028  // is not ZERO. A "ZERO combine mode" means that the results are
1029  // the same as if we had received all zeros, and added them to the
1030  // existing values. That means we don't need to communicate.
1031  if (CM != ZERO) {
1032  if (constantNumPackets == 0) {
1033  if (verbose) {
1034  std::ostringstream os;
1035  os << *prefix << "3. (Re)allocate num{Ex,Im}portPacketsPerLID"
1036  << endl;
1037  std::cerr << os.str ();
1038  }
1039  // This only reallocates if necessary, that is, if the sizes
1040  // don't match.
1041  this->reallocArraysForNumPacketsPerLid (exportLIDs.extent (0),
1042  remoteLIDs.extent (0));
1043  }
1044 
1045  if (verbose) {
1046  std::ostringstream os;
1047  os << *prefix << "4. packAndPrepare: before, "
1048  << dualViewStatusToString (this->exports_, "exports_")
1049  << endl;
1050  std::cerr << os.str ();
1051  }
1052 
1053  doPackAndPrepare(src, exportLIDs, constantNumPackets);
1054  if (commOnHost) {
1055  this->exports_.sync_host();
1056  }
1057  else {
1058  this->exports_.sync_device();
1059  }
1060 
1061  if (verbose) {
1062  std::ostringstream os;
1063  os << *prefix << "5.1. After packAndPrepare, "
1064  << dualViewStatusToString (this->exports_, "exports_")
1065  << endl;
1066  std::cerr << os.str ();
1067  }
1068  } // if (CM != ZERO)
1069 
1070  // We only need to send data if the combine mode is not ZERO.
1071  if (CM != ZERO) {
1072  if (constantNumPackets != 0) {
1073  // There are a constant number of packets per element. We
1074  // already know (from the number of "remote" (incoming)
1075  // elements) how many incoming elements we expect, so we can
1076  // resize the buffer accordingly.
1077  const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1078  reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1079  }
1080 
1081  // Do we need to do communication (via doPostsAndWaits)?
1082  bool needCommunication = true;
1083 
1084  // This may be NULL. It will be used below.
1085  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1086 
1087  if (revOp == DoReverse && ! this->isDistributed ()) {
1088  needCommunication = false;
1089  }
1090  // FIXME (mfh 30 Jun 2013): Checking whether the source object
1091  // is distributed requires a cast to DistObject. If it's not a
1092  // DistObject, then I'm not quite sure what to do. Perhaps it
1093  // would be more appropriate for SrcDistObject to have an
1094  // isDistributed() method. For now, I'll just assume that we
1095  // need to do communication unless the cast succeeds and the
1096  // source is not distributed.
1097  else if (revOp == DoForward && srcDistObj != NULL &&
1098  ! srcDistObj->isDistributed ()) {
1099  needCommunication = false;
1100  }
1101 
1102  if (! needCommunication) {
1103  if (verbose) {
1104  std::ostringstream os;
1105  os << *prefix << "Comm not needed; skipping" << endl;
1106  std::cerr << os.str ();
1107  }
1108  }
1109  else {
1110  ProfilingRegion region_dpw
1111  ("Tpetra::DistObject::doTransferNew::doPostsAndWaits");
1112 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1113  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1114  // favor of Kokkos profiling.
1115  Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
1116 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1117 
1118  if (verbose) {
1119  std::ostringstream os;
1120  os << *prefix << "7.0. "
1121  << (revOp == DoReverse ? "Reverse" : "Forward")
1122  << " mode" << endl;
1123  std::cerr << os.str ();
1124  }
1125 
1126  doPosts(distributorPlan, constantNumPackets, commOnHost, prefix, canTryAliasing, CM);
1127  } // if (needCommunication)
1128  } // if (CM != ZERO)
1129  }
1130 
1131  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1132  void
1133  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1134  endTransfer(const SrcDistObject& src,
1135  const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
1136  const char modeString[],
1137  const ReverseOption revOp,
1138  const CombineMode CM,
1139  bool restrictedMode)
1140  {
1141  using Details::Behavior;
1144  using Details::ProfilingRegion;
1145  using Kokkos::Compat::getArrayView;
1146  using Kokkos::Compat::getConstArrayView;
1147  using Kokkos::Compat::getKokkosViewDeepCopy;
1148  using Kokkos::Compat::create_const_view;
1149  using std::endl;
1151  using Details::ProfilingRegion;
1152  const char funcName[] = "Tpetra::DistObject::doTransfer";
1153 
1154  ProfilingRegion region_doTransfer(funcName);
1155  const bool verbose = Behavior::verbose("DistObject");
1156  std::shared_ptr<std::string> prefix;
1157  if (verbose) {
1158  std::ostringstream os;
1159  prefix = this->createPrefix("DistObject", "doTransfer");
1160  os << *prefix << "Source type: " << Teuchos::typeName(src)
1161  << ", Target type: " << Teuchos::typeName(*this) << endl;
1162  std::cerr << os.str();
1163  }
1164 
1165  // "Restricted Mode" does two things:
1166  // 1) Skips copyAndPermute
1167  // 2) Allows the "target" Map of the transfer to be a subset of
1168  // the Map of *this, in a "locallyFitted" sense.
1169  //
1170  // This cannot be used if #2 is not true, OR there are permutes.
1171  // Source Maps still need to match
1172 
1173  // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
1174  // checks. These may communicate more.
1175  const bool debug = Behavior::debug("DistObject");
1176  if (debug) {
1177  if (! restrictedMode && revOp == DoForward) {
1178  const bool myMapSameAsTransferTgtMap =
1179  this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1180  TEUCHOS_TEST_FOR_EXCEPTION
1181  (! myMapSameAsTransferTgtMap, std::invalid_argument,
1182  "Tpetra::DistObject::" << modeString << ": For forward-mode "
1183  "communication, the target DistObject's Map must be the same "
1184  "(in the sense of Tpetra::Map::isSameAs) as the input "
1185  "Export/Import object's target Map.");
1186  }
1187  else if (! restrictedMode && revOp == DoReverse) {
1188  const bool myMapSameAsTransferSrcMap =
1189  this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1190  TEUCHOS_TEST_FOR_EXCEPTION
1191  (! myMapSameAsTransferSrcMap, std::invalid_argument,
1192  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1193  "communication, the target DistObject's Map must be the same "
1194  "(in the sense of Tpetra::Map::isSameAs) as the input "
1195  "Export/Import object's source Map.");
1196  }
1197  else if (restrictedMode && revOp == DoForward) {
1198  const bool myMapLocallyFittedTransferTgtMap =
1199  this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
1200  TEUCHOS_TEST_FOR_EXCEPTION
1201  (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
1202  "Tpetra::DistObject::" << modeString << ": For forward-mode "
1203  "communication using restricted mode, Export/Import object's "
1204  "target Map must be locally fitted (in the sense of "
1205  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1206  }
1207  else { // if (restrictedMode && revOp == DoReverse)
1208  const bool myMapLocallyFittedTransferSrcMap =
1209  this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
1210  TEUCHOS_TEST_FOR_EXCEPTION
1211  (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
1212  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1213  "communication using restricted mode, Export/Import object's "
1214  "source Map must be locally fitted (in the sense of "
1215  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1216  }
1217 
1218  // SrcDistObject need not even _have_ Maps. However, if the
1219  // source object is a DistObject, it has a Map, and we may
1220  // compare that Map with the Transfer's Maps.
1221  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1222  if (srcDistObj != nullptr) {
1223  if (revOp == DoForward) {
1224  const bool srcMapSameAsImportSrcMap =
1225  srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1226  TEUCHOS_TEST_FOR_EXCEPTION
1227  (! srcMapSameAsImportSrcMap, std::invalid_argument,
1228  "Tpetra::DistObject::" << modeString << ": For forward-mode "
1229  "communication, the source DistObject's Map must be the same "
1230  "as the input Export/Import object's source Map.");
1231  }
1232  else { // revOp == DoReverse
1233  const bool srcMapSameAsImportTgtMap =
1234  srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1235  TEUCHOS_TEST_FOR_EXCEPTION
1236  (! srcMapSameAsImportTgtMap, std::invalid_argument,
1237  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1238  "communication, the source DistObject's Map must be the same "
1239  "as the input Export/Import object's target Map.");
1240  }
1241  }
1242  }
1243 
1244  Distributor& distor = transfer.getDistributor ();
1245  const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
1246 
1247  TEUCHOS_TEST_FOR_EXCEPTION
1248  (debug && restrictedMode &&
1249  (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
1250  transfer.getPermuteFromLIDs_dv().extent(0) != 0),
1251  std::invalid_argument,
1252  "Tpetra::DistObject::" << modeString << ": Transfer object "
1253  "cannot have permutes in restricted mode.");
1254 
1255  // Do we need all communication buffers to live on host?
1256  const bool commOnHost = ! Behavior::assumeMpiIsCudaAware ();
1257  if (verbose) {
1258  std::ostringstream os;
1259  os << *prefix << "doTransfer: Use new interface; "
1260  "commOnHost=" << (commOnHost ? "true" : "false") << endl;
1261  std::cerr << os.str ();
1262  }
1263 
1264  using const_lo_dv_type =
1265  Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
1266  const_lo_dv_type permuteToLIDs = (revOp == DoForward) ?
1267  transfer.getPermuteToLIDs_dv () :
1268  transfer.getPermuteFromLIDs_dv ();
1269  const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ?
1270  transfer.getPermuteFromLIDs_dv () :
1271  transfer.getPermuteToLIDs_dv ();
1272  const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
1273  transfer.getRemoteLIDs_dv () :
1274  transfer.getExportLIDs_dv ();
1275  const_lo_dv_type exportLIDs = (revOp == DoForward) ?
1276  transfer.getExportLIDs_dv () :
1277  transfer.getRemoteLIDs_dv ();
1278  const bool canTryAliasing = (revOp == DoForward) ?
1279  transfer.areRemoteLIDsContiguous() :
1280  transfer.areExportLIDsContiguous();
1281 
1282  size_t constantNumPackets = this->constantNumberOfPackets ();
1283 
1284  // We only need to send data if the combine mode is not ZERO.
1285  if (CM != ZERO) {
1286  if (constantNumPackets != 0) {
1287  // There are a constant number of packets per element. We
1288  // already know (from the number of "remote" (incoming)
1289  // elements) how many incoming elements we expect, so we can
1290  // resize the buffer accordingly.
1291  const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1292  reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1293  }
1294 
1295  // Do we need to do communication (via doPostsAndWaits)?
1296  bool needCommunication = true;
1297 
1298  // This may be NULL. It will be used below.
1299  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1300 
1301  if (revOp == DoReverse && ! this->isDistributed ()) {
1302  needCommunication = false;
1303  }
1304  // FIXME (mfh 30 Jun 2013): Checking whether the source object
1305  // is distributed requires a cast to DistObject. If it's not a
1306  // DistObject, then I'm not quite sure what to do. Perhaps it
1307  // would be more appropriate for SrcDistObject to have an
1308  // isDistributed() method. For now, I'll just assume that we
1309  // need to do communication unless the cast succeeds and the
1310  // source is not distributed.
1311  else if (revOp == DoForward && srcDistObj != NULL &&
1312  ! srcDistObj->isDistributed ()) {
1313  needCommunication = false;
1314  }
1315 
1316  if (! needCommunication) {
1317  if (verbose) {
1318  std::ostringstream os;
1319  os << *prefix << "Comm not needed; skipping" << endl;
1320  std::cerr << os.str ();
1321  }
1322  }
1323  else {
1324  distributorActor_.doWaits(distributorPlan);
1325 
1326  if (verbose) {
1327  std::ostringstream os;
1328  os << *prefix << "8. unpackAndCombine" << endl;
1329  std::cerr << os.str ();
1330  }
1331  doUnpackAndCombine(remoteLIDs, constantNumPackets, CM);
1332  } // if (needCommunication)
1333  } // if (CM != ZERO)
1334 
1335  if (verbose) {
1336  std::ostringstream os;
1337  os << *prefix << "9. Done!" << endl;
1338  std::cerr << os.str ();
1339  }
1340 
1341  if (verbose) {
1342  std::ostringstream os;
1343  os << *prefix << "Tpetra::DistObject::doTransfer: Done!" << endl;
1344  std::cerr << os.str ();
1345  }
1346  }
1347 
1348  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1349  void
1350  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1351  doPosts(const Details::DistributorPlan& distributorPlan,
1352  size_t constantNumPackets,
1353  bool commOnHost,
1354  std::shared_ptr<std::string> prefix,
1355  const bool canTryAliasing,
1356  const CombineMode CM)
1357  {
1360  using Kokkos::Compat::create_const_view;
1361  using std::endl;
1362 
1363  const bool verbose = Details::Behavior::verbose("DistObject");
1364 
1365  if (constantNumPackets == 0) { // variable num packets per LID
1366  if (verbose) {
1367  std::ostringstream os;
1368  os << *prefix << "7.1. Variable # packets / LID: first comm "
1369  << "(commOnHost = " << (commOnHost ? "true" : "false") << ")"
1370  << endl;
1371  std::cerr << os.str ();
1372  }
1373  size_t totalImportPackets = 0;
1374  if (commOnHost) {
1375  if (this->numExportPacketsPerLID_.need_sync_host ()) {
1376  this->numExportPacketsPerLID_.sync_host ();
1377  }
1378  if (this->numImportPacketsPerLID_.need_sync_host ()) {
1379  this->numImportPacketsPerLID_.sync_host ();
1380  }
1381  this->numImportPacketsPerLID_.modify_host (); // out arg
1382  auto numExp_h =
1383  create_const_view (this->numExportPacketsPerLID_.view_host ());
1384  auto numImp_h = this->numImportPacketsPerLID_.view_host ();
1385 
1386  // MPI communication happens here.
1387  if (verbose) {
1388  std::ostringstream os;
1389  os << *prefix << "Call doPostsAndWaits"
1390  << endl;
1391  std::cerr << os.str ();
1392  }
1393  distributorActor_.doPostsAndWaits(distributorPlan, numExp_h, 1, numImp_h);
1394 
1395  if (verbose) {
1396  std::ostringstream os;
1397  os << *prefix << "Count totalImportPackets" << std::endl;
1398  std::cerr << os.str ();
1399  }
1400  using the_dev_type = typename decltype (numImp_h)::device_type;
1401  totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_h);
1402  }
1403  else { // ! commOnHost
1404  this->numExportPacketsPerLID_.sync_device ();
1405  this->numImportPacketsPerLID_.sync_device ();
1406  this->numImportPacketsPerLID_.modify_device (); // out arg
1407  auto numExp_d = create_const_view
1408  (this->numExportPacketsPerLID_.view_device ());
1409  auto numImp_d = this->numImportPacketsPerLID_.view_device ();
1410 
1411  // MPI communication happens here.
1412  if (verbose) {
1413  std::ostringstream os;
1414  os << *prefix << "Call doPostsAndWaits"
1415  << endl;
1416  std::cerr << os.str ();
1417  }
1418  distributorActor_.doPostsAndWaits(distributorPlan, numExp_d, 1, numImp_d);
1419 
1420  if (verbose) {
1421  std::ostringstream os;
1422  os << *prefix << "Count totalImportPackets" << std::endl;
1423  std::cerr << os.str ();
1424  }
1425  using the_dev_type = typename decltype (numImp_d)::device_type;
1426  totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_d);
1427  }
1428 
1429  if (verbose) {
1430  std::ostringstream os;
1431  os << *prefix << "totalImportPackets=" << totalImportPackets << endl;
1432  std::cerr << os.str ();
1433  }
1434  this->reallocImportsIfNeeded (totalImportPackets, verbose,
1435  prefix.get (), canTryAliasing, CM);
1436  if (verbose) {
1437  std::ostringstream os;
1438  os << *prefix << "7.3. Second comm" << std::endl;
1439  std::cerr << os.str ();
1440  }
1441 
1442  // mfh 04 Feb 2019: Distributor expects the "num packets per
1443  // LID" arrays on host, so that it can issue MPI sends and
1444  // receives correctly.
1445  this->numExportPacketsPerLID_.sync_host ();
1446  this->numImportPacketsPerLID_.sync_host ();
1447 
1448  // NOTE (mfh 25 Apr 2016, 01 Aug 2017) doPostsAndWaits and
1449  // doReversePostsAndWaits currently want
1450  // numExportPacketsPerLID and numImportPacketsPerLID as
1451  // Teuchos::ArrayView, rather than as Kokkos::View.
1452  //
1453  // NOTE (mfh 04 Feb 2019) This does NOT copy from host to
1454  // device. The above syncs might.
1455  auto numExportPacketsPerLID_av =
1456  getArrayViewFromDualView (this->numExportPacketsPerLID_);
1457  auto numImportPacketsPerLID_av =
1458  getArrayViewFromDualView (this->numImportPacketsPerLID_);
1459 
1460  // imports_ is for output only, so we don't need to sync it
1461  // before marking it as modified. However, in order to
1462  // prevent spurious debug-mode errors (e.g., "modified on
1463  // both device and host"), we first need to clear its
1464  // "modified" flags.
1465  this->imports_.clear_sync_state ();
1466 
1467  if (verbose) {
1468  std::ostringstream os;
1469  os << *prefix << "Comm on "
1470  << (commOnHost ? "host" : "device")
1471  << "; call doPosts" << endl;
1472  std::cerr << os.str ();
1473  }
1474 
1475  if (commOnHost) {
1476  this->imports_.modify_host ();
1477  distributorActor_.doPosts
1478  (distributorPlan,
1479  create_const_view (this->exports_.view_host ()),
1480  numExportPacketsPerLID_av,
1481  this->imports_.view_host (),
1482  numImportPacketsPerLID_av);
1483  }
1484  else { // pack on device
1485  Kokkos::fence(); // for UVM
1486  this->imports_.modify_device ();
1487  distributorActor_.doPosts
1488  (distributorPlan,
1489  create_const_view (this->exports_.view_device ()),
1490  numExportPacketsPerLID_av,
1491  this->imports_.view_device (),
1492  numImportPacketsPerLID_av);
1493  }
1494  }
1495  else { // constant number of packets per LID
1496  if (verbose) {
1497  std::ostringstream os;
1498  os << *prefix << "7.1. Const # packets per LID: " << endl
1499  << *prefix << " "
1500  << dualViewStatusToString (this->exports_, "exports_")
1501  << endl
1502  << *prefix << " "
1503  << dualViewStatusToString (this->exports_, "imports_")
1504  << endl;
1505  std::cerr << os.str ();
1506  }
1507  // imports_ is for output only, so we don't need to sync it
1508  // before marking it as modified. However, in order to
1509  // prevent spurious debug-mode errors (e.g., "modified on
1510  // both device and host"), we first need to clear its
1511  // "modified" flags.
1512  this->imports_.clear_sync_state ();
1513 
1514  if (verbose) {
1515  std::ostringstream os;
1516  os << *prefix << "7.2. Comm on "
1517  << (commOnHost ? "host" : "device")
1518  << "; call doPosts" << endl;
1519  std::cerr << os.str ();
1520  }
1521  if (commOnHost) {
1522  this->imports_.modify_host ();
1523  distributorActor_.doPosts
1524  (distributorPlan,
1525  create_const_view (this->exports_.view_host ()),
1526  constantNumPackets,
1527  this->imports_.view_host ());
1528  }
1529  else { // pack on device
1530  Kokkos::fence(); // for UVM
1531  this->imports_.modify_device ();
1532  distributorActor_.doPosts
1533  (distributorPlan,
1534  create_const_view (this->exports_.view_device ()),
1535  constantNumPackets,
1536  this->imports_.view_device ());
1537  } // commOnHost
1538  } // constant or variable num packets per LID
1539  }
1540 
1541  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1542  void
1543  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1544  doPackAndPrepare(const SrcDistObject& src,
1545  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1546  size_t& constantNumPackets)
1547  {
1548  using Details::ProfilingRegion;
1549  using std::endl;
1550  const bool debug = Details::Behavior::debug("DistObject");
1551 
1552  ProfilingRegion region_pp
1553  ("Tpetra::DistObject::doTransferNew::packAndPrepare");
1554 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1555  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1556  // favor of Kokkos profiling.
1557  Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_);
1558 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1559 
1560  // Ask the source to pack data. Also ask it whether there are
1561  // a constant number of packets per element
1562  // (constantNumPackets is an output argument). If there are,
1563  // constantNumPackets will come back nonzero. Otherwise, the
1564  // source will fill the numExportPacketsPerLID_ array.
1565 
1566  // FIXME (mfh 18 Oct 2017) if (! commOnHost), sync to device?
1567  // Alternately, make packAndPrepare take a "commOnHost"
1568  // argument to tell it where to leave the data?
1569  //
1570  // NOTE (mfh 04 Feb 2019) Subclasses of DistObject should have
1571  // the freedom to pack and unpack either on host or device.
1572  // We should prefer sync'ing only on demand. Thus, we can
1573  // answer the above question: packAndPrepare should not
1574  // take a commOnHost argument, and doTransferNew should sync
1575  // where needed, if needed.
1576  if (debug) {
1577  std::ostringstream lclErrStrm;
1578  bool lclSuccess = false;
1579  try {
1580  this->packAndPrepare (src, exportLIDs, this->exports_,
1581  this->numExportPacketsPerLID_,
1582  constantNumPackets);
1583  lclSuccess = true;
1584  }
1585  catch (std::exception& e) {
1586  lclErrStrm << "packAndPrepare threw an exception: "
1587  << endl << e.what();
1588  }
1589  catch (...) {
1590  lclErrStrm << "packAndPrepare threw an exception "
1591  "not a subclass of std::exception.";
1592  }
1593  const char gblErrMsgHeader[] = "Tpetra::DistObject "
1594  "threw an exception in packAndPrepare on "
1595  "one or more processes in the DistObject's communicator.";
1596  auto comm = getMap()->getComm();
1597  Details::checkGlobalError(std::cerr, lclSuccess,
1598  lclErrStrm.str().c_str(),
1599  gblErrMsgHeader, *comm);
1600  }
1601  else {
1602  this->packAndPrepare (src, exportLIDs, this->exports_,
1603  this->numExportPacketsPerLID_,
1604  constantNumPackets);
1605  }
1606  }
1607 
1608  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1609  void
1610  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1611  doUnpackAndCombine(const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& remoteLIDs,
1612  size_t constantNumPackets,
1613  CombineMode CM)
1614  {
1615  using Details::ProfilingRegion;
1616  using std::endl;
1617  const bool debug = Details::Behavior::debug("DistObject");
1618 
1619  ProfilingRegion region_uc
1620  ("Tpetra::DistObject::doTransferNew::unpackAndCombine");
1621 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1622  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1623  // favor of Kokkos profiling.
1624  Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_);
1625 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1626 
1627  if (debug) {
1628  std::ostringstream lclErrStrm;
1629  bool lclSuccess = false;
1630  try {
1631  this->unpackAndCombine (remoteLIDs, this->imports_,
1632  this->numImportPacketsPerLID_,
1633  constantNumPackets, CM);
1634  lclSuccess = true;
1635  }
1636  catch (std::exception& e) {
1637  lclErrStrm << "unpackAndCombine threw an exception: "
1638  << endl << e.what();
1639  }
1640  catch (...) {
1641  lclErrStrm << "unpackAndCombine threw an exception "
1642  "not a subclass of std::exception.";
1643  }
1644  const char gblErrMsgHeader[] = "Tpetra::DistObject "
1645  "threw an exception in unpackAndCombine on "
1646  "one or more processes in the DistObject's communicator.";
1647  auto comm = getMap()->getComm();
1648  Details::checkGlobalError(std::cerr, lclSuccess,
1649  lclErrStrm.str().c_str(),
1650  gblErrMsgHeader, *comm);
1651  }
1652  else {
1653  this->unpackAndCombine (remoteLIDs, this->imports_,
1654  this->numImportPacketsPerLID_,
1655  constantNumPackets, CM);
1656  }
1657  }
1658 
1659  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1660  void
1663  (const SrcDistObject&,
1664  const size_t,
1665  const Kokkos::DualView<
1666  const local_ordinal_type*,
1668  const Kokkos::DualView<
1669  const local_ordinal_type*,
1671  const CombineMode CM)
1672  {}
1673 
1674  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1675  void
1678  (const SrcDistObject&,
1679  const Kokkos::DualView<
1680  const local_ordinal_type*,
1682  Kokkos::DualView<
1683  packet_type*,
1685  Kokkos::DualView<
1686  size_t*,
1688  size_t&)
1689  {}
1690 
1691  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1692  void
1695  (const Kokkos::DualView<
1696  const local_ordinal_type*,
1697  buffer_device_type>& /* importLIDs */,
1698  Kokkos::DualView<
1699  packet_type*,
1700  buffer_device_type> /* imports */,
1701  Kokkos::DualView<
1702  size_t*,
1703  buffer_device_type> /* numPacketsPerLID */,
1704  const size_t /* constantNumPackets */,
1705  const CombineMode /* combineMode */)
1706  {}
1707 
1708 
1709  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1710  void
1712  print (std::ostream& os) const
1713  {
1714  using Teuchos::FancyOStream;
1715  using Teuchos::getFancyOStream;
1716  using Teuchos::RCP;
1717  using Teuchos::rcpFromRef;
1718  using std::endl;
1719 
1720  RCP<FancyOStream> out = getFancyOStream (rcpFromRef (os));
1721  this->describe (*out, Teuchos::VERB_DEFAULT);
1722  }
1723 
1724  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1725  std::unique_ptr<std::string>
1727  createPrefix(const char className[],
1728  const char methodName[]) const
1729  {
1730  auto map = this->getMap();
1731  auto comm = map.is_null() ? Teuchos::null : map->getComm();
1732  return Details::createPrefix(
1733  comm.getRawPtr(), className, methodName);
1734  }
1735 
1736  template<class DistObjectType>
1737  void
1739  Teuchos::RCP<DistObjectType>& input,
1740  const Teuchos::RCP<const Map<
1741  typename DistObjectType::local_ordinal_type,
1742  typename DistObjectType::global_ordinal_type,
1743  typename DistObjectType::node_type>>& newMap)
1744  {
1745  input->removeEmptyProcessesInPlace (newMap);
1746  if (newMap.is_null ()) { // my process is excluded
1747  input = Teuchos::null;
1748  }
1749  }
1750 
1751  template<class DistObjectType>
1752  void
1753  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input)
1754  {
1755  auto newMap = input->getMap ()->removeEmptyProcesses ();
1756  removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
1757  }
1758 
1759 // Explicit instantiation macro for general DistObject.
1760 #define TPETRA_DISTOBJECT_INSTANT(SCALAR, LO, GO, NODE) \
1761  template class DistObject< SCALAR , LO , GO , NODE >;
1762 
1763 // Explicit instantiation macro for DistObject<char, ...>.
1764 // The "SLGN" stuff above doesn't work for Packet=char.
1765 #define TPETRA_DISTOBJECT_INSTANT_CHAR(LO, GO, NODE) \
1766  template class DistObject< char , LO , GO , NODE >;
1767 
1768 } // namespace Tpetra
1769 
1770 #endif // TPETRA_DISTOBJECT_DEF_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
void unpackAndCombine(const RowView &row_ptrs_beg, const RowView &row_ptrs_end, IndicesView &indices, const Kokkos::View< const GlobalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &imports, const Kokkos::View< const size_t *, BufferDevice, Kokkos::MemoryUnmanaged > &num_packets_per_lid, const Kokkos::View< const LocalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &import_lids, const typename CrsGraph< LocalOrdinal, GlobalOrdinal, Node >::padding_type &padding, const bool unpack_pids, const int myRank, const bool verbose)
Perform the unpack operation for the graph.
Stand-alone utility functions and macros.
Description of Tpetra's behavior.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
Base class for distributed Tpetra objects that support data redistribution.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print a descriptiion of this object to the given output stream.
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool remoteLIDsContiguous=false, const CombineMode CM=INSERT)
Reallocate imports_ if needed.
virtual bool reallocArraysForNumPacketsPerLid(const size_t numExportLIDs, const size_t numImportLIDs)
Reallocate numExportPacketsPerLID_ and/or numImportPacketsPerLID_, if necessary.
void doImport(const SrcDistObject &source, const Import< LocalOrdinal, GlobalOrdinal, Node > &importer, const CombineMode CM, const bool restrictedMode=false)
Import data into this object using an Import object ("forward mode").
void beginTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Implementation detail of doTransfer.
DistObject(const Teuchos::RCP< const map_type > &map)
Constructor.
virtual void packAndPrepare(const SrcDistObject &source, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets)
Pack data and metadata for communication (sends).
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
LocalOrdinal local_ordinal_type
The type of local indices.
virtual void doTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Redistribute data across (MPI) processes.
void print(std::ostream &os) const
Print this object to the given output stream.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode)
Perform any unpacking and combining after communication.
typename ::Kokkos::Details::ArithTraits< Packet >::val_type packet_type
The type of each datum being sent or received in an Import or Export.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM)
Perform copies and permutations that are local to the calling (MPI) process.
ReverseOption
Whether the data transfer should be performed in forward or reverse mode.
virtual size_t constantNumberOfPackets() const
Whether the implementation's instance promises always to have a constant number of packets per LID (l...
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
virtual std::string description() const
One-line descriptiion of this object.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes which contain no entries in this object's Map.
bool isDistributed() const
Whether this is a globally distributed object.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Abstract base class for objects that can be the source of an Import or Export operation.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object's Map.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
CombineMode
Rule for combining data in an Import or Export.
@ ZERO
Replace old values with zero.