Tpetra parallel linear algebra  Version of the Day
Tpetra_Details_FixedHashTable_def.hpp
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 #ifndef TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
45 #define TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
46 
47 #include "Tpetra_Details_FixedHashTable_decl.hpp"
49 #ifdef TPETRA_USE_MURMUR_HASH
50 # include "Kokkos_Functional.hpp" // hash function used by Kokkos::UnorderedMap
51 #endif // TPETRA_USE_MURMUR_HASH
52 #include "Kokkos_ArithTraits.hpp"
53 #include "Teuchos_TypeNameTraits.hpp"
55 #include <type_traits>
56 
57 namespace Tpetra {
58 namespace Details {
59 //
60 // This namespace stores utility functions and Kokkos
61 // functors for use in FixedHashTable construction.
62 //
63 namespace FHT {
64 
65 
66 // Is it worth actually using building the FixedHashTable using
67 // parallel threads, instead of just counting in a sequential loop?
68 //
69 // The parallel version of FixedHashTable construction isn't just a
70 // parallelization of the sequential loops. It incurs additional
71 // overheads. For example, the CountBuckets kernel uses atomic update
72 // instructions to count the number of "buckets" per offsets array
73 // (ptr) entry. Atomic updates have overhead, even if only one thread
74 // issues them. The Kokkos kernels are still correct in that case,
75 // but I would rather not incur overhead then. It might make sense to
76 // set the minimum number of threads to something greater than 1, but
77 // we would need experiments to find out.
78 template<class ExecSpace>
79 bool worthBuildingFixedHashTableInParallel () {
80  return ExecSpace::concurrency() > 1;
81 }
82 
83 //
84 // Functors for FixedHashTable initialization
85 //
86 // NOTE (mfh 23 May 2015): Once we can use lambdas with CUDA, we
87 // should consider replacing all of these functors with in-line
88 // lambdas. The only issue is that we would need to bake the
89 // execution space into the policy, since the default execution space
90 // might differ from the one Tpetra wants to use.
91 
101 template<class CountsViewType,
102  class KeysViewType,
103  class SizeType = typename KeysViewType::size_type>
105 public:
106  typedef CountsViewType counts_view_type;
107  typedef KeysViewType keys_view_type;
108  typedef typename CountsViewType::execution_space execution_space;
109  typedef typename CountsViewType::memory_space memory_space;
110  typedef SizeType size_type;
111  typedef typename keys_view_type::non_const_value_type key_type;
112  // mfh 21 May 2015: Having a device_type typedef in the functor
113  // along with an execution_space typedef causes compilation issues.
114  // This is because one of Kokkos' partial specializations picks up
115  // on the device_type typedef, and another picks up on the
116  // execution_space typedef. The former is a legacy of a previous
117  // design iteration of Kokkos, which did not separate memory and
118  // execution spaces.
120 
126  CountBuckets (const counts_view_type& counts,
127  const keys_view_type& keys,
128  const size_type size) :
129  counts_ (counts),
130  keys_ (keys),
131  size_ (size)
132  {}
133 
138  KOKKOS_INLINE_FUNCTION void
139  operator () (const size_type& i) const
140  {
141  typedef typename hash_type::result_type hash_value_type;
142 
143  const hash_value_type hashVal = hash_type::hashFunc (keys_[i], size_);
144  Kokkos::atomic_increment (&counts_[hashVal]);
145  }
146 
147  using value_type = Kokkos::pair<int, key_type>;
148 
152  KOKKOS_INLINE_FUNCTION void
153  operator () (const size_type& i, value_type& dst) const
154  {
155  using hash_value_type = typename hash_type::result_type;
156 
157  const key_type keyVal = keys_[i];
158  const hash_value_type hashVal = hash_type::hashFunc (keyVal, size_);
159  if (hashVal < hash_value_type (0) ||
160  hashVal >= hash_value_type (counts_.extent (0))) {
161  dst.first = 1;
162  dst.second = keyVal;
163  }
164  else {
165  Kokkos::atomic_increment (&counts_[hashVal]);
166  }
167  }
168 
170  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
171  {
172  dst.first = 0;
173  dst.second = key_type (0);
174  }
175 
176  KOKKOS_INLINE_FUNCTION void
177  join (volatile value_type& dst,
178  const volatile value_type& src) const
179  {
180  const bool good = dst.first == 0 || src.first == 0;
181  dst.first = good ? 0 : dst.first;
182  // leave dst.second as it is, to get the "first" key
183  }
184 
185 private:
187  counts_view_type counts_;
189  keys_view_type keys_;
191  size_type size_;
192 };
193 
204 template<class KeyType>
206  KOKKOS_INLINE_FUNCTION
207  FillPairsResult () :
208  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
209  // min() for a floating-point type returns the minimum _positive_
210  // normalized value. This is different than for integer types.
211  // lowest() is new in C++11 and returns the least value, always
212  // negative for signed finite types.
213  //
214  // mfh 23 May 2015: I have heard reports that
215  // std::numeric_limits<int>::lowest() does not exist with the
216  // Intel compiler. I'm not sure if the users in question actually
217  // enabled C++11. However, it's easy enough to work around this
218  // issue. The standard floating-point types are signed and have a
219  // sign bit, so lowest() is just -max(). For integer types, we
220  // can use min() instead.
221  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
222  ::Kokkos::Details::ArithTraits<KeyType>::min () :
223  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
224  success_ (true)
225  {
226  static_assert (std::is_arithmetic<KeyType>::value, "FillPairsResult: "
227  "KeyType must be some kind of number type.");
228  }
229 
230  KOKKOS_INLINE_FUNCTION
231  FillPairsResult (const KeyType& initMinKey,
232  const KeyType& initMaxKey) :
233  minKey_ (initMinKey),
234  maxKey_ (initMaxKey),
235  success_ (true)
236  {
237  static_assert (std::is_arithmetic<KeyType>::value, "FillPairsResult: "
238  "KeyType must be some kind of number type.");
239  }
240 
241  KeyType minKey_;
242  KeyType maxKey_;
243  bool success_;
244 };
245 
275 template<class PairsViewType,
276  class KeysViewType,
277  class CountsViewType,
278  class SizeType = typename KeysViewType::size_type>
279 class FillPairs {
280 public:
281  typedef typename CountsViewType::non_const_type counts_view_type;
282  typedef typename counts_view_type::const_type offsets_view_type;
283 
284  typedef PairsViewType pairs_view_type;
285  typedef typename KeysViewType::const_type keys_view_type;
286  typedef typename offsets_view_type::execution_space execution_space;
287  typedef typename offsets_view_type::memory_space memory_space;
288  typedef SizeType size_type;
289 
290  typedef typename keys_view_type::non_const_value_type key_type;
291  typedef typename pairs_view_type::non_const_value_type pair_type;
292 
294 
295  // mfh 23 May 2015: Having a device_type typedef in the functor
296  // along with an execution_space typedef causes compilation issues.
297  // This is because one of Kokkos' partial specializations picks up
298  // on the device_type typedef, and another picks up on the
299  // execution_space typedef. The former is a legacy of a previous
300  // design iteration of Kokkos, which did not separate memory and
301  // execution spaces.
303 
314  FillPairs (const pairs_view_type& pairs,
315  const counts_view_type& counts,
316  const offsets_view_type& ptr,
317  const keys_view_type& keys,
318  const typename pair_type::second_type startingValue) :
319  pairs_ (pairs),
320  counts_ (counts),
321  ptr_ (ptr),
322  keys_ (keys),
323  size_ (counts.extent (0)),
324  startingValue_ (startingValue),
325  initMinKey_ (::Kokkos::Details::ArithTraits<key_type>::max ()),
326  initMaxKey_ (::Kokkos::Details::ArithTraits<key_type>::is_integer ?
327  ::Kokkos::Details::ArithTraits<key_type>::min () :
328  -::Kokkos::Details::ArithTraits<key_type>::max ())
329  {}
330 
349  FillPairs (const pairs_view_type& pairs,
350  const counts_view_type& counts,
351  const offsets_view_type& ptr,
352  const keys_view_type& keys,
353  const typename pair_type::second_type startingValue,
354  const key_type initMinKey,
355  const key_type initMaxKey) :
356  pairs_ (pairs),
357  counts_ (counts),
358  ptr_ (ptr),
359  keys_ (keys),
360  size_ (counts.extent (0)),
361  startingValue_ (startingValue),
362  initMinKey_ (initMinKey),
363  initMaxKey_ (initMaxKey)
364  {}
365 
367  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
368  {
369  dst.minKey_ = initMinKey_;
370  dst.maxKey_ = initMaxKey_;
371  dst.success_ = true;
372  }
373 
374  KOKKOS_INLINE_FUNCTION void
375  join (volatile value_type& dst,
376  const volatile value_type& src) const
377  {
378  if (src.maxKey_ > dst.maxKey_) {
379  dst.maxKey_ = src.maxKey_;
380  }
381  if (src.minKey_ < dst.minKey_) {
382  dst.minKey_ = src.minKey_;
383  }
384  dst.success_ = dst.success_ && src.success_;
385  }
386 
391  KOKKOS_INLINE_FUNCTION void
392  operator () (const size_type& i, value_type& dst) const
393  {
394  typedef typename hash_type::result_type hash_value_type;
395  typedef typename offsets_view_type::non_const_value_type offset_type;
396  typedef typename pair_type::second_type val_type;
397  typedef typename std::remove_reference< decltype( counts_[0] ) >::type atomic_incr_type;
398 
399  const key_type key = keys_[i];
400  if (key > dst.maxKey_) {
401  dst.maxKey_ = key;
402  }
403  if (key < dst.minKey_) {
404  dst.minKey_ = key;
405  }
406  const val_type theVal = startingValue_ + static_cast<val_type> (i);
407  const hash_value_type hashVal = hash_type::hashFunc (key, size_);
408 
409  // Return the old count; decrement afterwards.
410  const offset_type count = Kokkos::atomic_fetch_add (&counts_[hashVal], atomic_incr_type(-1));
411  if (count == 0) {
412  dst.success_ = false; // FAILURE!
413  }
414  else {
415  const offset_type curPos = ptr_[hashVal+1] - count;
416 
417  pairs_[curPos].first = key;
418  pairs_[curPos].second = theVal;
419  }
420  }
421 
422 private:
423  pairs_view_type pairs_;
424  counts_view_type counts_;
425  offsets_view_type ptr_;
426  keys_view_type keys_;
427  size_type size_;
428  typename pair_type::second_type startingValue_;
430  key_type initMinKey_;
432  key_type initMaxKey_;
433 };
434 
458 template<class OffsetsViewType,
459  class PairsViewType,
460  class SizeType = typename OffsetsViewType::size_type>
462 public:
463  typedef typename OffsetsViewType::const_type offsets_view_type;
464  typedef typename PairsViewType::const_type pairs_view_type;
465  typedef typename offsets_view_type::execution_space execution_space;
466  typedef typename offsets_view_type::memory_space memory_space;
467  typedef SizeType size_type;
468 
469  // The result of the check is whether the table has one or more duplicates.
470  typedef int value_type;
471 
476  CheckForDuplicateKeys (const pairs_view_type& pairs,
477  const offsets_view_type& ptr) :
478  pairs_ (pairs),
479  ptr_ (ptr),
480  size_ (ptr_.extent (0) == 0 ?
481  size_type (0) :
482  ptr_.extent (0) - 1)
483  {}
484 
486  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
487  {
488  dst = 0;
489  }
490 
492  KOKKOS_INLINE_FUNCTION void
493  join (volatile value_type& dst,
494  const volatile value_type& src) const
495  {
496  dst = dst + src > 0?1:0;
497  }
498 
500  KOKKOS_INLINE_FUNCTION void
501  operator () (const size_type& i, value_type& dst) const
502  {
503  typedef typename offsets_view_type::non_const_value_type offset_type;
504  typedef typename pairs_view_type::non_const_value_type pair_type;
505  typedef typename pair_type::first_type key_type;
506 
507  if (dst>0) {
508  return; // we've already found duplicate keys elsewhere
509  }
510  else {
511  const offset_type beg = ptr_[i];
512  const offset_type end = ptr_[i+1];
513  bool foundDuplicateKey = false;
514  // This is an ~ n^2 algorithm in the worst case, where n is the
515  // max number of keys that hash to the same bucket. However, if
516  // the hash function is reasonable, n should be much less than
517  // the total number of keys.
518  for (offset_type j = beg + 1; j < end; ++j) {
519  const key_type curKey = pairs_[j].first;
520  for (offset_type k = beg; k < j; ++k) {
521  if (pairs_[k].first == curKey) {
522  foundDuplicateKey = true;
523  break;
524  }
525  }
526  }
527  dst = (dst>0) || foundDuplicateKey?1:0;
528  }
529  }
530 
531 private:
532  pairs_view_type pairs_;
533  offsets_view_type ptr_;
534  size_type size_;
535 };
536 
537 } // namespace FHT
538 
539 //
540 // Here begins the actual implementation of FixedHashTable.
541 //
542 
543 template<class KeyType, class ValueType, class DeviceType>
545 FixedHashTable () :
546  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
547  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
548  ::Kokkos::Details::ArithTraits<KeyType>::min () :
549  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
550  minVal_ (::Kokkos::Details::ArithTraits<ValueType>::max ()),
551  maxVal_ (::Kokkos::Details::ArithTraits<ValueType>::is_integer ?
552  ::Kokkos::Details::ArithTraits<ValueType>::min () :
553  -::Kokkos::Details::ArithTraits<ValueType>::max ()),
554  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
555  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
556  ::Kokkos::Details::ArithTraits<KeyType>::min () :
557  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
558  contiguousValues_ (true), // trivially
559  checkedForDuplicateKeys_ (true), // it's an empty table; no need to check
560  hasDuplicateKeys_ (false)
561 {
562 }
563 
564 template<class KeyType, class ValueType, class DeviceType>
566 FixedHashTable (const keys_type& keys) :
567  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()), // to be set in init()
568  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
569  ::Kokkos::Details::ArithTraits<KeyType>::min () :
570  -::Kokkos::Details::ArithTraits<KeyType>::max ()), // to be set in init()
571  minVal_ (0),
572  maxVal_ (keys.size () == 0 ?
573  static_cast<ValueType> (0) :
574  static_cast<ValueType> (keys.size () - 1)),
575  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
576  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
577  ::Kokkos::Details::ArithTraits<KeyType>::min () :
578  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
579  contiguousValues_ (true),
580  checkedForDuplicateKeys_ (false),
581  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
582 {
583  const ValueType startingValue = static_cast<ValueType> (0);
584  const KeyType initMinKey = this->minKey_;
585  const KeyType initMaxKey = this->maxKey_;
586  this->init (keys, startingValue, initMinKey, initMaxKey,
587  initMinKey, initMinKey, false);
588 }
589 
590 template<class KeyType, class ValueType, class DeviceType>
592 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys) :
593  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()), // to be set in init()
594  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
595  ::Kokkos::Details::ArithTraits<KeyType>::min () :
596  -::Kokkos::Details::ArithTraits<KeyType>::max ()), // to be set in init()
597  minVal_ (0),
598  maxVal_ (keys.size () == 0 ?
599  static_cast<ValueType> (0) :
600  static_cast<ValueType> (keys.size () - 1)),
601  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
602  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
603  ::Kokkos::Details::ArithTraits<KeyType>::min () :
604  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
605  contiguousValues_ (true),
606  checkedForDuplicateKeys_ (false),
607  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
608 {
609  typedef typename keys_type::non_const_type nonconst_keys_type;
610 
611  // mfh 01 May 2015: I don't trust that
612  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
613  // so I ensure this manually.
614  const ValueType startingValue = static_cast<ValueType> (0);
615  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
616  keys.size ());
617  using Kokkos::ViewAllocateWithoutInitializing;
618  nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
619  keys_k.extent (0));
620  Kokkos::deep_copy (keys_d, keys_k);
621  const KeyType initMinKey = this->minKey_;
622  const KeyType initMaxKey = this->maxKey_;
623  this->init (keys_d, startingValue, initMinKey, initMaxKey,
624  initMinKey, initMinKey, false);
625 }
626 
627 template<class KeyType, class ValueType, class DeviceType>
629 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
630  const ValueType startingValue) :
631  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
632  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
633  ::Kokkos::Details::ArithTraits<KeyType>::min () :
634  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
635  minVal_ (startingValue),
636  maxVal_ (keys.size () == 0 ?
637  startingValue :
638  static_cast<ValueType> (startingValue + keys.size () - 1)),
639  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
640  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
641  ::Kokkos::Details::ArithTraits<KeyType>::min () :
642  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
643  contiguousValues_ (true),
644  checkedForDuplicateKeys_ (false),
645  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
646 {
647  typedef typename keys_type::non_const_type nonconst_keys_type;
648 
649  // mfh 01 May 2015: I don't trust that
650  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
651  // so I ensure this manually.
652  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
653  keys.size ());
654  using Kokkos::ViewAllocateWithoutInitializing;
655  nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
656  keys_k.extent (0));
657  Kokkos::deep_copy (keys_d, keys_k);
658 
659  const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
660  // min() for a floating-point type returns the minimum _positive_
661  // normalized value. This is different than for integer types.
662  // lowest() is new in C++11 and returns the least value, always
663  // negative for signed finite types.
664  //
665  // mfh 23 May 2015: I have heard reports that
666  // std::numeric_limits<int>::lowest() does not exist with the Intel
667  // compiler. I'm not sure if the users in question actually enabled
668  // C++11. However, it's easy enough to work around this issue. The
669  // standard floating-point types are signed and have a sign bit, so
670  // lowest() is just -max(). For integer types, we can use min()
671  // instead.
672  const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
673  ::Kokkos::Details::ArithTraits<KeyType>::min () :
674  -::Kokkos::Details::ArithTraits<KeyType>::max ();
675  this->init (keys_d, startingValue, initMinKey, initMaxKey,
676  initMinKey, initMinKey, false);
677 
678 }
679 
680 template<class KeyType, class ValueType, class DeviceType>
682 FixedHashTable (const keys_type& keys,
683  const KeyType firstContigKey,
684  const KeyType lastContigKey,
685  const ValueType startingValue) :
686  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
687  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
688  ::Kokkos::Details::ArithTraits<KeyType>::min () :
689  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
690  minVal_ (startingValue),
691  maxVal_ (keys.size () == 0 ?
692  startingValue :
693  static_cast<ValueType> (startingValue + keys.size () - 1)),
694  firstContigKey_ (firstContigKey),
695  lastContigKey_ (lastContigKey),
696  contiguousValues_ (true),
697  checkedForDuplicateKeys_ (false),
698  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
699 {
700  const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
701  // min() for a floating-point type returns the minimum _positive_
702  // normalized value. This is different than for integer types.
703  // lowest() is new in C++11 and returns the least value, always
704  // negative for signed finite types.
705  //
706  // mfh 23 May 2015: I have heard reports that
707  // std::numeric_limits<int>::lowest() does not exist with the Intel
708  // compiler. I'm not sure if the users in question actually enabled
709  // C++11. However, it's easy enough to work around this issue. The
710  // standard floating-point types are signed and have a sign bit, so
711  // lowest() is just -max(). For integer types, we can use min()
712  // instead.
713  const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
714  ::Kokkos::Details::ArithTraits<KeyType>::min () :
715  -::Kokkos::Details::ArithTraits<KeyType>::max ();
716  this->init (keys, startingValue, initMinKey, initMaxKey,
717  firstContigKey, lastContigKey, true);
718 }
719 
720 template<class KeyType, class ValueType, class DeviceType>
722 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
723  const KeyType firstContigKey,
724  const KeyType lastContigKey,
725  const ValueType startingValue) :
726  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
727  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
728  ::Kokkos::Details::ArithTraits<KeyType>::min () :
729  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
730  minVal_ (startingValue),
731  maxVal_ (keys.size () == 0 ?
732  startingValue :
733  static_cast<ValueType> (startingValue + keys.size () - 1)),
734  firstContigKey_ (firstContigKey),
735  lastContigKey_ (lastContigKey),
736  contiguousValues_ (true),
737  checkedForDuplicateKeys_ (false),
738  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
739 {
740  typedef typename keys_type::non_const_type nonconst_keys_type;
741 
742  // mfh 01 May 2015: I don't trust that
743  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
744  // so I ensure this manually.
745  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
746  keys.size ());
747  using Kokkos::ViewAllocateWithoutInitializing;
748  nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
749  keys_k.extent (0));
750  Kokkos::deep_copy (keys_d, keys_k);
751 
752  const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
753  // min() for a floating-point type returns the minimum _positive_
754  // normalized value. This is different than for integer types.
755  // lowest() is new in C++11 and returns the least value, always
756  // negative for signed finite types.
757  //
758  // mfh 23 May 2015: I have heard reports that
759  // std::numeric_limits<int>::lowest() does not exist with the Intel
760  // compiler. I'm not sure if the users in question actually enabled
761  // C++11. However, it's easy enough to work around this issue. The
762  // standard floating-point types are signed and have a sign bit, so
763  // lowest() is just -max(). For integer types, we can use min()
764  // instead.
765  const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
766  ::Kokkos::Details::ArithTraits<KeyType>::min () :
767  -::Kokkos::Details::ArithTraits<KeyType>::max ();
768  this->init (keys_d, startingValue, initMinKey, initMaxKey,
769  firstContigKey, lastContigKey, true);
770 }
771 
772 template<class KeyType, class ValueType, class DeviceType>
774 FixedHashTable (const keys_type& keys,
775  const ValueType startingValue) :
776  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
777  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
778  ::Kokkos::Details::ArithTraits<KeyType>::min () :
779  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
780  minVal_ (startingValue),
781  maxVal_ (keys.size () == 0 ?
782  startingValue :
783  static_cast<ValueType> (startingValue + keys.size () - 1)),
784  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
785  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
786  ::Kokkos::Details::ArithTraits<KeyType>::min () :
787  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
788  contiguousValues_ (true),
789  checkedForDuplicateKeys_ (false),
790  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
791 {
792  const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
793  // min() for a floating-point type returns the minimum _positive_
794  // normalized value. This is different than for integer types.
795  // lowest() is new in C++11 and returns the least value, always
796  // negative for signed finite types.
797  //
798  // mfh 23 May 2015: I have heard reports that
799  // std::numeric_limits<int>::lowest() does not exist with the Intel
800  // compiler. I'm not sure if the users in question actually enabled
801  // C++11. However, it's easy enough to work around this issue. The
802  // standard floating-point types are signed and have a sign bit, so
803  // lowest() is just -max(). For integer types, we can use min()
804  // instead.
805  const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
806  ::Kokkos::Details::ArithTraits<KeyType>::min () :
807  -::Kokkos::Details::ArithTraits<KeyType>::max ();
808  this->init (keys, startingValue, initMinKey, initMaxKey,
809  initMinKey, initMinKey, false);
810 }
811 
812 template<class KeyType, class ValueType, class DeviceType>
814 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
815  const Teuchos::ArrayView<const ValueType>& vals) :
816  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
817  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
818  ::Kokkos::Details::ArithTraits<KeyType>::min () :
819  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
820  minVal_ (::Kokkos::Details::ArithTraits<ValueType>::max ()),
821  maxVal_ (::Kokkos::Details::ArithTraits<ValueType>::is_integer ?
822  ::Kokkos::Details::ArithTraits<ValueType>::min () :
823  -::Kokkos::Details::ArithTraits<ValueType>::max ()),
824  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
825  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
826  ::Kokkos::Details::ArithTraits<KeyType>::min () :
827  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
828  contiguousValues_ (false),
829  checkedForDuplicateKeys_ (false),
830  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
831 {
832  // mfh 01 May 2015: I don't trust that
833  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
834  // so I ensure this manually.
835  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
836  keys.size ());
837  host_input_vals_type vals_k (vals.size () == 0 ? NULL : vals.getRawPtr (),
838  vals.size ());
839  const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
840  // min() for a floating-point type returns the minimum _positive_
841  // normalized value. This is different than for integer types.
842  // lowest() is new in C++11 and returns the least value, always
843  // negative for signed finite types.
844  //
845  // mfh 23 May 2015: I have heard reports that
846  // std::numeric_limits<int>::lowest() does not exist with the Intel
847  // compiler. I'm not sure if the users in question actually enabled
848  // C++11. However, it's easy enough to work around this issue. The
849  // standard floating-point types are signed and have a sign bit, so
850  // lowest() is just -max(). For integer types, we can use min()
851  // instead.
852  const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
853  ::Kokkos::Details::ArithTraits<KeyType>::min () :
854  -::Kokkos::Details::ArithTraits<KeyType>::max ();
855  this->init (keys_k, vals_k, initMinKey, initMaxKey);
856 }
857 
858 template<class KeyType, class ValueType, class DeviceType>
859 void
861 init (const keys_type& keys,
862  ValueType startingValue,
863  KeyType initMinKey,
864  KeyType initMaxKey,
865  KeyType firstContigKey,
866  KeyType lastContigKey,
867  const bool computeInitContigKeys)
868 {
869  using Kokkos::subview;
870  using Kokkos::ViewAllocateWithoutInitializing;
871  using Teuchos::TypeNameTraits;
872  typedef typename std::decay<decltype (keys.extent (0)) >::type size_type;
873  const char prefix[] = "Tpetra::Details::FixedHashTable: ";
874 
875  const offset_type numKeys = static_cast<offset_type> (keys.extent (0));
876  {
877  const offset_type theMaxVal = ::Kokkos::Details::ArithTraits<offset_type>::max ();
878  const size_type maxValST = static_cast<size_type> (theMaxVal);
879  TEUCHOS_TEST_FOR_EXCEPTION
880  (keys.extent (0) > maxValST, std::invalid_argument, prefix << "The "
881  "number of keys " << keys.extent (0) << " does not fit in "
882  "offset_type = " << TypeNameTraits<offset_type>::name () << ", whose "
883  "max value is " << theMaxVal << ". This means that it is not possible to "
884  "use this constructor.");
885  }
886  TEUCHOS_TEST_FOR_EXCEPTION
887  (static_cast<unsigned long long> (numKeys) >
888  static_cast<unsigned long long> (::Kokkos::Details::ArithTraits<ValueType>::max ()),
889  std::invalid_argument, "Tpetra::Details::FixedHashTable: The number of "
890  "keys " << numKeys << " is greater than the maximum representable "
891  "ValueType value " << ::Kokkos::Details::ArithTraits<ValueType>::max () << ". "
892  "This means that it is not possible to use this constructor.");
893  TEUCHOS_TEST_FOR_EXCEPTION
894  (numKeys > static_cast<offset_type> (INT_MAX), std::logic_error, prefix <<
895  "This class currently only works when the number of keys is <= INT_MAX = "
896  << INT_MAX << ". If this is a problem for you, please talk to the Tpetra "
897  "developers.");
898 
899  const bool buildInParallel =
900  FHT::worthBuildingFixedHashTableInParallel<execution_space> ();
901  const bool debug = ::Tpetra::Details::Behavior::debug ();
902 
903  // NOTE (mfh 14 May 2015) This method currently assumes UVM. We
904  // could change that by setting up ptr and val as Kokkos::DualView
905  // instances. If we do that, since we are filling on host for now,
906  // we want to make sure that we only zero-fill ptr on host
907  // initially, and that we don't fill val at all. Once we finish
908  // Kokkos-izing all the set-up kernels, we won't need DualView for
909  // either ptr or val.
910 
911  if (computeInitContigKeys) {
912  // Find the first and last initial contiguous keys. If we find a
913  // long sequence of initial contiguous keys, we can save space by
914  // not storing them explicitly as pairs in the hash table.
915  //
916  // NOTE (mfh 01 Jun 2015) Doing this in parallel requires a scan
917  // ("min index such that the difference between the current key and
918  // the next != 1"), which takes multiple passes over the data. We
919  // could fuse it with CountBuckets (only update counts on 'final'
920  // pass). However, we're really just moving this sequential search
921  // out of Map's constructor here, so there is no loss in doing it
922  // sequentially for now. Later, we can work on parallelization.
923  if (numKeys > 0) {
924  // FIXME: make it a parallel kernel with no host copy
925  auto keys_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
926  keys);
927  firstContigKey_ = keys_h[0];
928  // Start with one plus, then decrement at the end. That lets us do
929  // only one addition per loop iteration, rather than two (if we test
930  // against lastContigKey + 1 and then increment lastContigKey).
931  lastContigKey_ = firstContigKey_ + 1;
932 
933  // We will only store keys in the table that are not part of the
934  // initial contiguous sequence. It's possible for the initial
935  // contiguous sequence to be trivial, which for a nonzero number of
936  // keys means that the "sequence" has length 1.
937  for (offset_type k = 1; k < numKeys; ++k) {
938  if (lastContigKey_ != keys_h[k]) {
939  break;
940  }
941  ++lastContigKey_;
942  }
943  --lastContigKey_;
944  }
945  }
946  else {
947  firstContigKey_ = firstContigKey;
948  lastContigKey_ = lastContigKey;
949  }
950 
951  offset_type startIndex;
952  if (numKeys > 0) {
953  initMinKey = std::min (initMinKey, firstContigKey_);
954  initMaxKey = std::max (initMaxKey, lastContigKey_);
955  startIndex = static_cast<offset_type> (lastContigKey_ - firstContigKey_);
956  } else {
957  startIndex = 0;
958  }
959 
960  const offset_type theNumKeys = numKeys - startIndex;
961  const offset_type size = hash_type::getRecommendedSize (theNumKeys);
962 #ifdef HAVE_TPETRA_DEBUG
963  TEUCHOS_TEST_FOR_EXCEPTION(
964  size == 0 && numKeys != 0, std::logic_error,
965  "Tpetra::Details::FixedHashTable constructor: "
966  "getRecommendedSize(" << numKeys << ") returned zero, "
967  "even though the number of keys " << numKeys << " is nonzero. "
968  "Please report this bug to the Tpetra developers.");
969 #endif // HAVE_TPETRA_DEBUG
970  keys_type theKeys =
971  subview (keys, std::pair<offset_type, offset_type> (startIndex, numKeys));
972 
973  // FIXME (mfh 28 Mar 2016) For some reason, we don't seem to need a
974  // fence here, but we do before other allocations.
975 
976  // The array of counts must be separate from the array of offsets,
977  // in order for parallel_scan to work correctly.
978  typedef typename ptr_type::non_const_type counts_type;
979  counts_type counts ("Tpetra::FixedHashTable::counts", size);
980 
981  //
982  // Count the number of "buckets" per offsets array (ptr) entry.
983  //
984 
985  // Will only create the mirror for buildInParallel false - but then use it in two places
986  typename keys_type::HostMirror theKeysHost;
987 
988  // The Kokkos kernel uses atomic update instructions to count the
989  // number of "buckets" per offsets array (ptr) entry. Atomic
990  // updates incur overhead, even in the sequential case. The Kokkos
991  // kernel is still correct in that case, but I would rather not
992  // incur overhead then.
993  if (buildInParallel) {
994  FHT::CountBuckets<counts_type, keys_type> functor (counts, theKeys, size);
995  using execution_space = typename counts_type::execution_space;
996  using range_type = Kokkos::RangePolicy<execution_space, offset_type>;
997  const char kernelLabel[] = "Tpetra::Details::FixedHashTable CountBuckets";
998  if (debug) {
999  using key_type = typename keys_type::non_const_value_type;
1000  Kokkos::pair<int, key_type> err;
1001  Kokkos::parallel_reduce (kernelLabel, range_type (0, theNumKeys),
1002  functor, err);
1003  TEUCHOS_TEST_FOR_EXCEPTION
1004  (err.first != 0, std::logic_error, "Tpetra::Details::FixedHashTable "
1005  "constructor: CountBuckets found a key " << err.second << " that "
1006  "results in an out-of-bounds hash value.");
1007  }
1008  else {
1009  Kokkos::parallel_for (kernelLabel, range_type (0, theNumKeys), functor);
1010  }
1011  }
1012  else {
1013  Kokkos::HostSpace hostMemSpace;
1014  theKeysHost = Kokkos::create_mirror_view(theKeys);
1015  Kokkos::deep_copy(theKeysHost, theKeys);
1016  auto countsHost = Kokkos::create_mirror_view (hostMemSpace, counts);
1017 
1018  for (offset_type k = 0; k < theNumKeys; ++k) {
1019  using key_type = typename keys_type::non_const_value_type;
1020  const key_type key = theKeysHost[k];
1021 
1022  using hash_value_type = typename hash_type::result_type;
1023  const hash_value_type hashVal = hash_type::hashFunc (key, size);
1024  TEUCHOS_TEST_FOR_EXCEPTION
1025  (hashVal < hash_value_type (0) ||
1026  hashVal >= hash_value_type (countsHost.extent (0)),
1027  std::logic_error, "Tpetra::Details::FixedHashTable "
1028  "constructor: Sequential CountBuckets found a key " << key
1029  << " that results in an out-of-bounds hash value.");
1030 
1031  ++countsHost[hashVal];
1032  }
1033  Kokkos::deep_copy (counts, countsHost);
1034  }
1035 
1036  // KJ: This fence is not required for the 2-argument deep_copy which calls
1037  // fence, but will be required if switched to the 3-argumemt deep_copy which
1038  // passes a space. The 3-argument form does not fence.
1039  execution_space().fence ();
1040 
1041  // Kokkos::View fills with zeros by default.
1042  typename ptr_type::non_const_type ptr ("Tpetra::FixedHashTable::ptr", size+1);
1043 
1044  // Compute row offsets via prefix sum:
1045  //
1046  // ptr[i+1] = \sum_{j=0}^{i} counts[j].
1047  //
1048  // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
1049  // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
1050  // formula is ptr[i+1] += ptr[i].
1051  //
1052  // parallel_scan does not incur overhead with Kokkos::Serial, but
1053  // with actual parallel execution spaces, it does require multiple
1054  // passes over the data. Thus, it still makes sense to have a
1055  // sequential fall-back.
1056 
1058  if (buildInParallel) {
1059  computeOffsetsFromCounts (ptr, counts);
1060  }
1061 
1062  if (! buildInParallel || debug) {
1063  Kokkos::HostSpace hostMemSpace;
1064  auto counts_h = Kokkos::create_mirror_view_and_copy (hostMemSpace, counts);
1065  auto ptr_h = Kokkos::create_mirror_view (hostMemSpace, ptr);
1066 
1067 #ifdef KOKKOS_ENABLE_SERIAL
1068  Kokkos::Serial hostExecSpace;
1069 #else
1070  Kokkos::DefaultHostExecutionSpace hostExecSpace;
1071 #endif // KOKKOS_ENABLE_SERIAL
1072 
1073  computeOffsetsFromCounts (hostExecSpace, ptr_h, counts_h);
1074  Kokkos::deep_copy (ptr, ptr_h);
1075 
1076  if (debug) {
1077  bool bad = false;
1078  for (offset_type i = 0; i < size; ++i) {
1079  if (ptr_h[i+1] != ptr_h[i] + counts_h[i]) {
1080  bad = true;
1081  }
1082  }
1083  TEUCHOS_TEST_FOR_EXCEPTION
1084  (bad, std::logic_error, "Tpetra::Details::FixedHashTable "
1085  "constructor: computeOffsetsFromCounts gave an incorrect "
1086  "result.");
1087  }
1088  }
1089 
1090  // KJ: computeOffsetsFromCounts calls parallel_scan which does not fence.
1091  // This fence is necessary as we need to make sure that the offset view
1092  // completes before the view is used in the next functor.
1093  execution_space().fence ();
1094 
1095  // Allocate the array of (key,value) pairs. Don't fill it with
1096  // zeros, because we will fill it with actual data below.
1097  typedef typename val_type::non_const_type nonconst_val_type;
1098  nonconst_val_type val (ViewAllocateWithoutInitializing ("Tpetra::FixedHashTable::pairs"),
1099  theNumKeys);
1100 
1101  // Fill in the hash table's "values" (the (key,value) pairs).
1102  typedef FHT::FillPairs<typename val_type::non_const_type, keys_type,
1103  typename ptr_type::non_const_type> functor_type;
1104  typename functor_type::value_type result (initMinKey, initMaxKey);
1105 
1106  const ValueType newStartingValue = startingValue + static_cast<ValueType> (startIndex);
1107  if (buildInParallel) {
1108  functor_type functor (val, counts, ptr, theKeys, newStartingValue,
1109  initMinKey, initMaxKey);
1110  typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
1111  Kokkos::parallel_reduce (range_type (0, theNumKeys), functor, result);
1112  }
1113  else {
1114  Kokkos::HostSpace hostMemSpace;
1115  auto counts_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, counts);
1116  auto ptr_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, ptr);
1117  auto val_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, val);
1118  for (offset_type k = 0; k < theNumKeys; ++k) {
1119  typedef typename hash_type::result_type hash_value_type;
1120  const KeyType key = theKeysHost[k];
1121  if (key > result.maxKey_) {
1122  result.maxKey_ = key;
1123  }
1124  if (key < result.minKey_) {
1125  result.minKey_ = key;
1126  }
1127  const ValueType theVal = newStartingValue + static_cast<ValueType> (k);
1128  const hash_value_type hashVal = hash_type::hashFunc (key, size);
1129 
1130  // Return the old count; decrement afterwards.
1131  const offset_type count = counts_h[hashVal];
1132  --counts_h[hashVal];
1133  if (count == 0) {
1134  result.success_ = false; // FAILURE!
1135  break;
1136  }
1137  else {
1138  const offset_type curPos = ptr_h[hashVal+1] - count;
1139  val_h[curPos].first = key;
1140  val_h[curPos].second = theVal;
1141  }
1142  }
1143  Kokkos::deep_copy(counts, counts_h); // restore
1144  Kokkos::deep_copy(val, val_h); // restore
1145  }
1146 
1147  // FIXME (mfh 01 Jun 2015) Temporarily commented out because of
1148  // reports of exceptions being thrown in Albany.
1149  //
1150  // TEUCHOS_TEST_FOR_EXCEPTION
1151  // (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
1152  // "init: Filling the hash table failed! Please report this bug to the "
1153  // "Tpetra developers.");
1154  (void) result; // prevent build warning (set but unused variable)
1155 
1156  // "Commit" the computed arrays and other computed quantities.
1157  ptr_ = ptr;
1158  val_ = val;
1159  minKey_ = result.minKey_;
1160  maxKey_ = result.maxKey_;
1161  // We've already set firstContigKey_ and lastContigKey_ above.
1162 }
1163 
1164 
1165 template<class KeyType, class ValueType, class DeviceType>
1166 void
1167 FixedHashTable<KeyType, ValueType, DeviceType>::
1168 init (const host_input_keys_type& keys,
1169  const host_input_vals_type& vals,
1170  KeyType initMinKey,
1171  KeyType initMaxKey)
1172 {
1173  const offset_type numKeys = static_cast<offset_type> (keys.extent (0));
1174  TEUCHOS_TEST_FOR_EXCEPTION
1175  (static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::Details::ArithTraits<ValueType>::max ()),
1176  std::invalid_argument, "Tpetra::Details::FixedHashTable: The number of "
1177  "keys " << numKeys << " is greater than the maximum representable "
1178  "ValueType value " << ::Kokkos::Details::ArithTraits<ValueType>::max () << ".");
1179  TEUCHOS_TEST_FOR_EXCEPTION
1180  (numKeys > static_cast<offset_type> (INT_MAX), std::logic_error, "Tpetra::"
1181  "Details::FixedHashTable: This class currently only works when the number "
1182  "of keys is <= INT_MAX = " << INT_MAX << ". If this is a problem for you"
1183  ", please talk to the Tpetra developers.");
1184 
1185  // There's no need to find the first and last initial contiguous
1186  // keys in this case, because if we reach this init() function, then
1187  // hasContiguousValues() is false and so get() doesn't use the
1188  // initial contiguous sequence of keys.
1189 
1190  const offset_type size = hash_type::getRecommendedSize (numKeys);
1191 #ifdef HAVE_TPETRA_DEBUG
1192  TEUCHOS_TEST_FOR_EXCEPTION(
1193  size == 0 && numKeys != 0, std::logic_error,
1194  "Tpetra::Details::FixedHashTable constructor: "
1195  "getRecommendedSize(" << numKeys << ") returned zero, "
1196  "even though the number of keys " << numKeys << " is nonzero. "
1197  "Please report this bug to the Tpetra developers.");
1198 #endif // HAVE_TPETRA_DEBUG
1199 
1200  // FIXME: Investigate a couple options:
1201  // 1. Allocate ptr_h, val_h directly on host and only deep_copy to ptr_ and val_ once at the end
1202  // 2. Do all this work as a parallel kernel with the same execution/memory spaces as ptr_ and val_
1203  // An old comment from MFH noted ptr_h should be zero-initialized, while val_h should not be initialized.
1204  // It further noted that we shouldn't need a DualView type arrangement when all setup kernels have
1205  // been "Kokkos-ized".
1206  Kokkos::HostSpace hostMemSpace;
1207  typename ptr_type::non_const_type ptr ("Tpetra::FixedHashTable::ptr", size + 1);
1208  auto ptr_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, ptr);
1209 
1210  // Allocate the array of key,value pairs. Don't waste time filling
1211  // it with zeros, because we will fill it with actual data below.
1212  using Kokkos::ViewAllocateWithoutInitializing;
1213  typedef typename val_type::non_const_type nonconst_val_type;
1214  nonconst_val_type val (ViewAllocateWithoutInitializing ("Tpetra::FixedHashTable::pairs"),
1215  numKeys);
1216  auto val_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, val);
1217 
1218  // Compute number of entries in each hash table position.
1219  for (offset_type k = 0; k < numKeys; ++k) {
1220  const typename hash_type::result_type hashVal =
1221  hash_type::hashFunc (keys[k], size);
1222  // Shift over one, so that counts[j] = ptr[j+1]. See below.
1223  ++ptr_h[hashVal+1];
1224  }
1225 
1226  // Compute row offsets via prefix sum:
1227  //
1228  // ptr[i+1] = \sum_{j=0}^{i} counts[j].
1229  //
1230  // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
1231  // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
1232  // formula is ptr[i+1] += ptr[i].
1233  for (offset_type i = 0; i < size; ++i) {
1234  ptr_h[i+1] += ptr_h[i];
1235  }
1236  //ptr[0] = 0; // We've already done this when initializing ptr above.
1237 
1238  // curRowStart[i] is the offset of the next element in row i.
1239  typename ptr_type::non_const_type::HostMirror curRowStart ("Tpetra::FixedHashTable::curRowStart", size);
1240 
1241  // Fill in the hash table.
1242  FHT::FillPairsResult<KeyType> result (initMinKey, initMaxKey);
1243  for (offset_type k = 0; k < numKeys; ++k) {
1244  typedef typename hash_type::result_type hash_value_type;
1245  const KeyType key = keys[k];
1246  if (key > result.maxKey_) {
1247  result.maxKey_ = key;
1248  }
1249  if (key < result.minKey_) {
1250  result.minKey_ = key;
1251  }
1252  const ValueType theVal = vals[k];
1253  if (theVal > maxVal_) {
1254  maxVal_ = theVal;
1255  }
1256  if (theVal < minVal_) {
1257  minVal_ = theVal;
1258  }
1259  const hash_value_type hashVal = hash_type::hashFunc (key, size);
1260 
1261  const offset_type offset = curRowStart[hashVal];
1262  const offset_type curPos = ptr_h[hashVal] + offset;
1263  if (curPos >= ptr_h[hashVal+1]) {
1264  result.success_ = false; // FAILURE!
1265  }
1266  else {
1267  val_h[curPos].first = key;
1268  val_h[curPos].second = theVal;
1269  ++curRowStart[hashVal];
1270  }
1271  }
1272 
1273  TEUCHOS_TEST_FOR_EXCEPTION
1274  (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
1275  "init: Filling the hash table failed! Please report this bug to the "
1276  "Tpetra developers.");
1277 
1278  // "Commit" the computed arrays and other computed quantities.
1279  Kokkos::deep_copy(ptr, ptr_h);
1280  Kokkos::deep_copy(val, val_h);
1281 
1282  ptr_ = ptr;
1283  val_ = val;
1284  minKey_ = result.minKey_;
1285  maxKey_ = result.maxKey_;
1286  // We've already assigned to minVal_ and maxVal_ above.
1287 }
1288 
1289 template <class KeyType, class ValueType, class DeviceType>
1290 bool
1293 {
1294  if (! checkedForDuplicateKeys_) {
1295  hasDuplicateKeys_ = checkForDuplicateKeys ();
1296  checkedForDuplicateKeys_ = true;
1297  }
1298  return hasDuplicateKeys_;
1299 }
1300 
1301 template <class KeyType, class ValueType, class DeviceType>
1302 bool
1304 checkForDuplicateKeys () const
1305 {
1306  const offset_type size = this->getSize ();
1307  // It's allowed for the hash table to have a positive number of
1308  // buckets (getSize()), but still store no entries (numPairs()).
1309  // Both cases trivially entail no duplicates.
1310  if (size == 0 || this->numPairs () == 0) {
1311  return false;
1312  }
1313  else {
1314  typedef FHT::CheckForDuplicateKeys<ptr_type, val_type> functor_type;
1315  functor_type functor (val_, ptr_);
1316  int hasDupKeys = 0;
1317  typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
1318  Kokkos::parallel_reduce (range_type (0, size), functor, hasDupKeys);
1319  return hasDupKeys > 0;
1320  }
1321 }
1322 
1323 template <class KeyType, class ValueType, class DeviceType>
1324 std::string
1326 description () const
1327 {
1328  std::ostringstream oss;
1329  oss << "FixedHashTable<"
1330  << Teuchos::TypeNameTraits<KeyType>::name () << ","
1331  << Teuchos::TypeNameTraits<ValueType>::name () << ">: "
1332  << "{ numKeys: " << val_.extent (0)
1333  << ", tableSize: " << this->getSize () << " }";
1334  return oss.str();
1335 }
1336 
1337 template <class KeyType, class ValueType, class DeviceType>
1338 void
1340 describe (Teuchos::FancyOStream& out,
1341  const Teuchos::EVerbosityLevel verbLevel) const
1342 {
1343  using std::endl;
1344  using std::setw;
1345  using Teuchos::OSTab;
1346  using Teuchos::rcpFromRef;
1347  using Teuchos::TypeNameTraits;
1348  using Teuchos::VERB_DEFAULT;
1349  using Teuchos::VERB_NONE;
1350  using Teuchos::VERB_LOW;
1351  using Teuchos::VERB_EXTREME;
1352 
1353  // NOTE (mfh 14 May 2015) This method currently assumes UVM for
1354  // access to ptr_ and val_ from the host.
1355 
1356  Teuchos::EVerbosityLevel vl = verbLevel;
1357  if (vl == VERB_DEFAULT) vl = VERB_LOW;
1358 
1359  if (vl == VERB_NONE) {
1360  // do nothing
1361  }
1362  else if (vl == VERB_LOW) {
1363  out << this->description() << endl;
1364  }
1365  else { // MEDIUM, HIGH or EXTREME
1366  out << "FixedHashTable:" << endl;
1367  {
1368  OSTab tab1 (rcpFromRef (out));
1369 
1370  // const std::string label = this->getObjectLabel ();
1371  // if (label != "") {
1372  // out << "label: " << label << endl;
1373  // }
1374  out << "Template parameters:" << endl;
1375  {
1376  OSTab tab2 (rcpFromRef (out));
1377  out << "KeyType: " << TypeNameTraits<KeyType>::name () << endl
1378  << "ValueType: " << TypeNameTraits<ValueType>::name () << endl;
1379  }
1380 
1381  const offset_type tableSize = this->getSize ();
1382  const offset_type numKeys = val_.extent (0);
1383 
1384  out << "Table parameters:" << endl;
1385  {
1386  OSTab tab2 (rcpFromRef (out));
1387  out << "numKeys: " << numKeys << endl
1388  << "tableSize: " << tableSize << endl;
1389  }
1390 
1391  if (vl >= VERB_EXTREME) {
1392  out << "Contents: ";
1393  if (tableSize == 0 || numKeys == 0) {
1394  out << "[]" << endl;
1395  } else {
1396  out << "[ " << endl;
1397  {
1398  OSTab tab2 (rcpFromRef (out));
1399  for (offset_type i = 0; i < tableSize; ++i) {
1400  OSTab tab3 (rcpFromRef (out));
1401  out << "[";
1402  for (offset_type k = ptr_[i]; k < ptr_[i+1]; ++k) {
1403  out << "(" << val_[k].first << "," << val_[k].second << ")";
1404  if (k + 1 < ptr_[i+1]) {
1405  out << ", ";
1406  }
1407  }
1408  out << "]" << endl;
1409  } // for each table position i
1410  }
1411  out << "]" << endl;
1412  } // The table contains entries
1413  } // vl >= VERB_EXTREME
1414  }
1415  out << endl;
1416  } // if vl > VERB_LOW
1417 }
1418 
1419 } // namespace Details
1420 } // namespace Tpetra
1421 
1422 // Macro that explicitly instantiates FixedHashTable for the given local
1423 // ordinal (LO) and global ordinal (GO) types. Note that FixedHashTable's
1424 // template parameters occur in the opposite order of most Tpetra
1425 // classes. This is because FixedHashTable performs global-to-local
1426 // lookup, and the convention in templated C++ lookup tables (such as
1427 // std::map) is <KeyType, ValueType>.
1428 //
1429 // This macro must be explanded within the Tpetra::Details namespace.
1430 #define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT_DEFAULTNODE(LO,GO) \
1431  template class FixedHashTable< GO , LO >;
1432 
1433 // Macro that explicitly instantiates FixedHashTable for the given
1434 // local ordinal (LO), global ordinal (GO), and Kokkos device (DEVICE)
1435 // types. Note that FixedHashTable's first two template parameters
1436 // occur in the opposite order of most Tpetra classes. This is
1437 // because FixedHashTable performs global-to-local lookup, and the
1438 // convention in templated C++ lookup tables (such as std::map) is
1439 // <KeyType, ValueType>.
1440 //
1441 // This macro must be explanded within the Tpetra::Details namespace.
1442 #define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, DEVICE) \
1443  template class FixedHashTable< GO , LO , DEVICE >;
1444 
1445 #endif // TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
static bool debug()
Whether Tpetra is in debug mode.
Functor for checking whether a FixedHashTable has one or more duplicate entries.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body.
KOKKOS_INLINE_FUNCTION void join(volatile value_type &dst, const volatile value_type &src) const
Combine two intermediate reduction results.
CheckForDuplicateKeys(const pairs_view_type &pairs, const offsets_view_type &ptr)
Constructor.
Parallel for functor for counting "buckets" in the FixedHashTable.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i) const
Do this for every entry of keys_.
CountBuckets(const counts_view_type &counts, const keys_view_type &keys, const size_type size)
Constructor.
Parallel reduce functor for filling the FixedHashTable, and computing the min and max keys.
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue, const key_type initMinKey, const key_type initMaxKey)
Constructor that takes initial min and max key values.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body; do this for every entry of keys_.
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue)
Constructor.
std::string description() const
Implementation of Teuchos::Describable interface.
bool hasDuplicateKeys()
Whether the table has any duplicate keys.
Kokkos::View< const KeyType *, Kokkos::LayoutLeft, device_type > keys_type
Type of a 1-D Kokkos::View (array) used to store keys.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with the given verbosity to the output stream.
Implementation details of Tpetra.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Reduction result for FillPairs functor below.
bool success_
Whether fill succeeded (it can only fail on a bug)
The hash function for FixedHashTable.
ResultType result_type
Type of the return value of the hash function.
static KOKKOS_INLINE_FUNCTION result_type hashFunc(const argument_type &key, const offset_type &size)
The hash function.