Kokkos Core Kernels Package  Version of the Day
Kokkos_ScatterView.hpp
Go to the documentation of this file.
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
50 
51 #ifndef KOKKOS_SCATTER_VIEW_HPP
52 #define KOKKOS_SCATTER_VIEW_HPP
53 
54 #include <Kokkos_Core.hpp>
55 #include <utility>
56 
57 namespace Kokkos {
58 namespace Experimental {
59 
60 /*
61  * Reduction Type list
62  * - These corresponds to subset of the reducers in parallel_reduce
63  * - See Implementations of ScatterValue for details.
64  */
65 struct ScatterSum {};
66 struct ScatterProd {};
67 struct ScatterMax {};
68 struct ScatterMin {};
69 
70 struct ScatterNonDuplicated {};
71 struct ScatterDuplicated {};
72 
73 struct ScatterNonAtomic {};
74 struct ScatterAtomic {};
75 
76 } // namespace Experimental
77 } // namespace Kokkos
78 
79 namespace Kokkos {
80 namespace Impl {
81 namespace Experimental {
82 
83 template <typename ExecSpace>
84 struct DefaultDuplication;
85 
86 template <typename ExecSpace, typename Duplication>
87 struct DefaultContribution;
88 
89 #ifdef KOKKOS_ENABLE_SERIAL
90 template <>
91 struct DefaultDuplication<Kokkos::Serial> {
92  using type = Kokkos::Experimental::ScatterNonDuplicated;
93 };
94 
95 template <>
96 struct DefaultContribution<Kokkos::Serial,
97  Kokkos::Experimental::ScatterNonDuplicated> {
98  using type = Kokkos::Experimental::ScatterNonAtomic;
99 };
100 template <>
101 struct DefaultContribution<Kokkos::Serial,
102  Kokkos::Experimental::ScatterDuplicated> {
103  using type = Kokkos::Experimental::ScatterNonAtomic;
104 };
105 #endif
106 
107 #ifdef KOKKOS_ENABLE_OPENMP
108 template <>
109 struct DefaultDuplication<Kokkos::OpenMP> {
110  using type = Kokkos::Experimental::ScatterDuplicated;
111 };
112 template <>
113 struct DefaultContribution<Kokkos::OpenMP,
114  Kokkos::Experimental::ScatterNonDuplicated> {
115  using type = Kokkos::Experimental::ScatterAtomic;
116 };
117 template <>
118 struct DefaultContribution<Kokkos::OpenMP,
119  Kokkos::Experimental::ScatterDuplicated> {
120  using type = Kokkos::Experimental::ScatterNonAtomic;
121 };
122 #endif
123 
124 #ifdef KOKKOS_ENABLE_OPENMPTARGET
125 template <>
126 struct DefaultDuplication<Kokkos::Experimental::OpenMPTarget> {
127  using type = Kokkos::Experimental::ScatterNonDuplicated;
128 };
129 template <>
130 struct DefaultContribution<Kokkos::Experimental::OpenMPTarget,
131  Kokkos::Experimental::ScatterNonDuplicated> {
132  using type = Kokkos::Experimental::ScatterAtomic;
133 };
134 template <>
135 struct DefaultContribution<Kokkos::Experimental::OpenMPTarget,
136  Kokkos::Experimental::ScatterDuplicated> {
137  using type = Kokkos::Experimental::ScatterNonAtomic;
138 };
139 #endif
140 
141 #ifdef KOKKOS_ENABLE_HPX
142 template <>
143 struct DefaultDuplication<Kokkos::Experimental::HPX> {
144  using type = Kokkos::Experimental::ScatterDuplicated;
145 };
146 template <>
147 struct DefaultContribution<Kokkos::Experimental::HPX,
148  Kokkos::Experimental::ScatterNonDuplicated> {
149  using type = Kokkos::Experimental::ScatterAtomic;
150 };
151 template <>
152 struct DefaultContribution<Kokkos::Experimental::HPX,
153  Kokkos::Experimental::ScatterDuplicated> {
154  using type = Kokkos::Experimental::ScatterNonAtomic;
155 };
156 #endif
157 
158 #ifdef KOKKOS_ENABLE_THREADS
159 template <>
160 struct DefaultDuplication<Kokkos::Threads> {
161  using type = Kokkos::Experimental::ScatterDuplicated;
162 };
163 template <>
164 struct DefaultContribution<Kokkos::Threads,
165  Kokkos::Experimental::ScatterNonDuplicated> {
166  using type = Kokkos::Experimental::ScatterAtomic;
167 };
168 template <>
169 struct DefaultContribution<Kokkos::Threads,
170  Kokkos::Experimental::ScatterDuplicated> {
171  using type = Kokkos::Experimental::ScatterNonAtomic;
172 };
173 #endif
174 
175 #ifdef KOKKOS_ENABLE_CUDA
176 template <>
177 struct DefaultDuplication<Kokkos::Cuda> {
178  using type = Kokkos::Experimental::ScatterNonDuplicated;
179 };
180 template <>
181 struct DefaultContribution<Kokkos::Cuda,
182  Kokkos::Experimental::ScatterNonDuplicated> {
183  using type = Kokkos::Experimental::ScatterAtomic;
184 };
185 template <>
186 struct DefaultContribution<Kokkos::Cuda,
187  Kokkos::Experimental::ScatterDuplicated> {
188  using type = Kokkos::Experimental::ScatterAtomic;
189 };
190 #endif
191 
192 #ifdef KOKKOS_ENABLE_HIP
193 template <>
194 struct DefaultDuplication<Kokkos::Experimental::HIP> {
195  using type = Kokkos::Experimental::ScatterNonDuplicated;
196 };
197 template <>
198 struct DefaultContribution<Kokkos::Experimental::HIP,
199  Kokkos::Experimental::ScatterNonDuplicated> {
200  using type = Kokkos::Experimental::ScatterAtomic;
201 };
202 template <>
203 struct DefaultContribution<Kokkos::Experimental::HIP,
204  Kokkos::Experimental::ScatterDuplicated> {
205  using type = Kokkos::Experimental::ScatterAtomic;
206 };
207 #endif
208 
209 #ifdef KOKKOS_ENABLE_SYCL
210 template <>
211 struct DefaultDuplication<Kokkos::Experimental::SYCL> {
212  using type = Kokkos::Experimental::ScatterNonDuplicated;
213 };
214 template <>
215 struct DefaultContribution<Kokkos::Experimental::SYCL,
216  Kokkos::Experimental::ScatterNonDuplicated> {
217  using type = Kokkos::Experimental::ScatterAtomic;
218 };
219 template <>
220 struct DefaultContribution<Kokkos::Experimental::SYCL,
221  Kokkos::Experimental::ScatterDuplicated> {
222  using type = Kokkos::Experimental::ScatterAtomic;
223 };
224 #endif
225 
226 // FIXME All these scatter values need overhaul:
227 // - like should they be copyable at all?
228 // - what is the internal handle type
229 // - remove join
230 // - consistently use the update function in operators
231 template <typename ValueType, typename Op, typename DeviceType,
232  typename Contribution>
233 struct ScatterValue;
234 
235 /* ScatterValue <Op=ScatterSum, Contribution=ScatterNonAtomic> is
236  the object returned by the access operator() of ScatterAccess. This class
237  inherits from the Sum<> reducer and it wraps join(dest, src) with convenient
238  operator+=, etc. Note the addition of update(ValueType const& rhs) and
239  reset() so that all reducers can have common functions See ReduceDuplicates
240  and ResetDuplicates ) */
241 template <typename ValueType, typename DeviceType>
242 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, DeviceType,
243  Kokkos::Experimental::ScatterNonAtomic> {
244  ValueType& value;
245 
246  public:
247  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
248  : value(value_in) {}
249  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
250  : value(other.value) {}
251  KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) {
252  update(rhs);
253  }
254  KOKKOS_FORCEINLINE_FUNCTION void operator++() { update(1); }
255  KOKKOS_FORCEINLINE_FUNCTION void operator++(int) { update(1); }
256  KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) {
257  update(ValueType(-rhs));
258  }
259  KOKKOS_FORCEINLINE_FUNCTION void operator--() { update(ValueType(-1)); }
260  KOKKOS_FORCEINLINE_FUNCTION void operator--(int) { update(ValueType(-1)); }
261  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
262  value += rhs;
263  }
264  KOKKOS_FORCEINLINE_FUNCTION void reset() {
265  value = reduction_identity<ValueType>::sum();
266  }
267 };
268 
269 /* ScatterValue <Op=ScatterSum, Contribution=ScatterAtomic> is the
270  object returned by the access operator() of ScatterAccess. This class inherits
271  from the Sum<> reducer, and similar to that returned by an Atomic View, it
272  wraps Kokkos::atomic_add with convenient operator+=, etc. This version also has
273  the update(rhs) and reset() functions. */
274 template <typename ValueType, typename DeviceType>
275 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, DeviceType,
276  Kokkos::Experimental::ScatterAtomic> {
277  ValueType& value;
278 
279  public:
280  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
281  : value(value_in) {}
282 
283  KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) {
284  this->join(value, rhs);
285  }
286  KOKKOS_FORCEINLINE_FUNCTION void operator++() { this->join(value, 1); }
287  KOKKOS_FORCEINLINE_FUNCTION void operator++(int) { this->join(value, 1); }
288  KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) {
289  this->join(value, ValueType(-rhs));
290  }
291  KOKKOS_FORCEINLINE_FUNCTION void operator--() {
292  this->join(value, ValueType(-1));
293  }
294  KOKKOS_FORCEINLINE_FUNCTION void operator--(int) {
295  this->join(value, ValueType(-1));
296  }
297 
298  KOKKOS_INLINE_FUNCTION
299  void join(ValueType& dest, const ValueType& src) const {
300  Kokkos::atomic_add(&dest, src);
301  }
302 
303  KOKKOS_INLINE_FUNCTION
304  void join(volatile ValueType& dest, const volatile ValueType& src) const {
305  Kokkos::atomic_add(&dest, src);
306  }
307 
308  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
309  this->join(value, rhs);
310  }
311 
312  KOKKOS_FORCEINLINE_FUNCTION void reset() {
313  value = reduction_identity<ValueType>::sum();
314  }
315 };
316 
317 /* ScatterValue <Op=ScatterProd, Contribution=ScatterNonAtomic> is
318  the object returned by the access operator() of ScatterAccess. This class
319  inherits from the Prod<> reducer, and it wraps join(dest, src) with
320  convenient operator*=, etc. Note the addition of update(ValueType const& rhs)
321  and reset() so that all reducers can have common functions See
322  ReduceDuplicates and ResetDuplicates ) */
323 template <typename ValueType, typename DeviceType>
324 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd, DeviceType,
325  Kokkos::Experimental::ScatterNonAtomic> {
326  ValueType& value;
327 
328  public:
329  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
330  : value(value_in) {}
331  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
332  : value(other.value) {}
333  KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) {
334  value *= rhs;
335  }
336  KOKKOS_FORCEINLINE_FUNCTION void operator/=(ValueType const& rhs) {
337  value /= rhs;
338  }
339 
340  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
341  value *= rhs;
342  }
343  KOKKOS_FORCEINLINE_FUNCTION void reset() {
344  value = reduction_identity<ValueType>::prod();
345  }
346 };
347 
348 /* ScatterValue <Op=ScatterProd, Contribution=ScatterAtomic> is the
349  object returned by the access operator() of ScatterAccess. This class
350  inherits from the Prod<> reducer, and similar to that returned by an Atomic
351  View, it wraps and atomic_prod with convenient operator*=, etc. atomic_prod
352  uses the atomic_compare_exchange. This version also has the update(rhs)
353  and reset() functions. */
354 template <typename ValueType, typename DeviceType>
355 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd, DeviceType,
356  Kokkos::Experimental::ScatterAtomic> {
357  ValueType& value;
358 
359  public:
360  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
361  : value(value_in) {}
362  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
363  : value(other.value) {}
364 
365  KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) {
366  Kokkos::atomic_mul(&value, rhs);
367  }
368  KOKKOS_FORCEINLINE_FUNCTION void operator/=(ValueType const& rhs) {
369  Kokkos::atomic_div(&value, rhs);
370  }
371 
372  KOKKOS_FORCEINLINE_FUNCTION
373  void atomic_prod(ValueType& dest, const ValueType& src) const {
374  bool success = false;
375  while (!success) {
376  ValueType dest_old = dest;
377  ValueType dest_new = dest_old * src;
378  dest_new =
379  Kokkos::atomic_compare_exchange<ValueType>(&dest, dest_old, dest_new);
380  success = ((dest_new - dest_old) / dest_old <= 1e-15);
381  }
382  }
383 
384  KOKKOS_INLINE_FUNCTION
385  void join(ValueType& dest, const ValueType& src) const {
386  atomic_prod(&dest, src);
387  }
388 
389  KOKKOS_INLINE_FUNCTION
390  void join(volatile ValueType& dest, const volatile ValueType& src) const {
391  atomic_prod(&dest, src);
392  }
393 
394  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
395  atomic_prod(&value, rhs);
396  }
397  KOKKOS_FORCEINLINE_FUNCTION void reset() {
398  value = reduction_identity<ValueType>::prod();
399  }
400 };
401 
402 /* ScatterValue <Op=ScatterMin, Contribution=ScatterNonAtomic> is
403  the object returned by the access operator() of ScatterAccess. This class
404  inherits from the Min<> reducer and it wraps join(dest, src) with convenient
405  update(rhs). Note the addition of update(ValueType const& rhs) and reset()
406  are so that all reducers can have a common update function See
407  ReduceDuplicates and ResetDuplicates ) */
408 template <typename ValueType, typename DeviceType>
409 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin, DeviceType,
410  Kokkos::Experimental::ScatterNonAtomic> {
411  ValueType& value;
412  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
413  : value(value_in) {}
414  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
415  : value(other.value) {}
416 
417  public:
418  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
419  value = rhs < value ? rhs : value;
420  }
421  KOKKOS_FORCEINLINE_FUNCTION void reset() {
422  value = reduction_identity<ValueType>::min();
423  }
424 };
425 
426 /* ScatterValue <Op=ScatterMin, Contribution=ScatterAtomic> is the
427  object returned by the access operator() of ScatterAccess. This class
428  inherits from the Min<> reducer, and similar to that returned by an Atomic
429  View, it wraps atomic_min with join(), etc. atomic_min uses the
430  atomic_compare_exchange. This version also has the update(rhs) and reset()
431  functions. */
432 template <typename ValueType, typename DeviceType>
433 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin, DeviceType,
434  Kokkos::Experimental::ScatterAtomic> {
435  ValueType& value;
436 
437  public:
438  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
439  : value(value_in) {}
440  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
441  : value(other.value) {}
442 
443  KOKKOS_FORCEINLINE_FUNCTION
444  void atomic_min(ValueType& dest, const ValueType& src) const {
445  bool success = false;
446  while (!success) {
447  ValueType dest_old = dest;
448  ValueType dest_new = (dest_old > src) ? src : dest_old;
449  dest_new =
450  Kokkos::atomic_compare_exchange<ValueType>(&dest, dest_old, dest_new);
451  success = ((dest_new - dest_old) / dest_old <= 1e-15);
452  }
453  }
454 
455  KOKKOS_INLINE_FUNCTION
456  void join(ValueType& dest, const ValueType& src) const {
457  atomic_min(dest, src);
458  }
459 
460  KOKKOS_INLINE_FUNCTION
461  void join(volatile ValueType& dest, const volatile ValueType& src) const {
462  atomic_min(dest, src);
463  }
464 
465  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
466  this->join(value, rhs);
467  }
468  KOKKOS_FORCEINLINE_FUNCTION void reset() {
469  value = reduction_identity<ValueType>::min();
470  }
471 };
472 
473 /* ScatterValue <Op=ScatterMax, Contribution=ScatterNonAtomic> is
474  the object returned by the access operator() of ScatterAccess. This class
475  inherits from the Max<> reducer and it wraps join(dest, src) with convenient
476  update(rhs). Note the addition of update(ValueType const& rhs) and reset()
477  are so that all reducers can have a common update function See
478  ReduceDuplicates and ResetDuplicates ) */
479 template <typename ValueType, typename DeviceType>
480 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax, DeviceType,
481  Kokkos::Experimental::ScatterNonAtomic> {
482  ValueType& value;
483 
484  public:
485  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
486  : value(value_in) {}
487  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
488  : value(other.value) {}
489  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
490  value = rhs > value ? rhs : value;
491  }
492  KOKKOS_FORCEINLINE_FUNCTION void reset() {
493  value = reduction_identity<ValueType>::max();
494  }
495 };
496 
497 /* ScatterValue <Op=ScatterMax, Contribution=ScatterAtomic> is the
498  object returned by the access operator() of ScatterAccess. This class
499  inherits from the Max<> reducer, and similar to that returned by an Atomic
500  View, it wraps atomic_max with join(), etc. atomic_max uses the
501  atomic_compare_exchange. This version also has the update(rhs) and reset()
502  functions. */
503 template <typename ValueType, typename DeviceType>
504 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax, DeviceType,
505  Kokkos::Experimental::ScatterAtomic> {
506  ValueType& value;
507 
508  public:
509  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
510  : value(value_in) {}
511  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
512  : value(other.value) {}
513 
514  KOKKOS_FORCEINLINE_FUNCTION
515  void atomic_max(ValueType& dest, const ValueType& src) const {
516  bool success = false;
517  while (!success) {
518  ValueType dest_old = dest;
519  ValueType dest_new = (dest_old < src) ? src : dest_old;
520  dest_new =
521  Kokkos::atomic_compare_exchange<ValueType>(&dest, dest_old, dest_new);
522  success = ((dest_new - dest_old) / dest_old <= 1e-15);
523  }
524  }
525 
526  KOKKOS_INLINE_FUNCTION
527  void join(ValueType& dest, const ValueType& src) const {
528  atomic_max(dest, src);
529  }
530 
531  KOKKOS_INLINE_FUNCTION
532  void join(volatile ValueType& dest, const volatile ValueType& src) const {
533  atomic_max(dest, src);
534  }
535 
536  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
537  this->join(value, rhs);
538  }
539  KOKKOS_FORCEINLINE_FUNCTION void reset() {
540  value = reduction_identity<ValueType>::max();
541  }
542 };
543 
544 /* DuplicatedDataType, given a View DataType, will create a new DataType
545  that has a new runtime dimension which becomes the largest-stride dimension.
546  In the case of LayoutLeft, due to the limitation induced by the design of
547  DataType itself, it must convert any existing compile-time dimensions into
548  runtime dimensions. */
549 template <typename T, typename Layout>
550 struct DuplicatedDataType;
551 
552 template <typename T>
553 struct DuplicatedDataType<T, Kokkos::LayoutRight> {
554  using value_type = T*; // For LayoutRight, add a star all the way on the left
555 };
556 
557 template <typename T, size_t N>
558 struct DuplicatedDataType<T[N], Kokkos::LayoutRight> {
559  using value_type =
560  typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type[N];
561 };
562 
563 template <typename T>
564 struct DuplicatedDataType<T[], Kokkos::LayoutRight> {
565  using value_type =
566  typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type[];
567 };
568 
569 template <typename T>
570 struct DuplicatedDataType<T*, Kokkos::LayoutRight> {
571  using value_type =
572  typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type*;
573 };
574 
575 template <typename T>
576 struct DuplicatedDataType<T, Kokkos::LayoutLeft> {
577  using value_type = T*;
578 };
579 
580 template <typename T, size_t N>
581 struct DuplicatedDataType<T[N], Kokkos::LayoutLeft> {
582  using value_type =
583  typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type*;
584 };
585 
586 template <typename T>
587 struct DuplicatedDataType<T[], Kokkos::LayoutLeft> {
588  using value_type =
589  typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type*;
590 };
591 
592 template <typename T>
593 struct DuplicatedDataType<T*, Kokkos::LayoutLeft> {
594  using value_type =
595  typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type*;
596 };
597 
598 /* Insert integer argument pack into array */
599 
600 template <class T>
601 void args_to_array(size_t* array, int pos, T dim0) {
602  array[pos] = dim0;
603 }
604 template <class T, class... Dims>
605 void args_to_array(size_t* array, int pos, T dim0, Dims... dims) {
606  array[pos] = dim0;
607  args_to_array(array, pos + 1, dims...);
608 }
609 
610 /* Slice is just responsible for stuffing the correct number of Kokkos::ALL
611  arguments on the correct side of the index in a call to subview() to get a
612  subview where the index specified is the largest-stride one. */
613 template <typename Layout, int rank, typename V, typename... Args>
614 struct Slice {
615  using next = Slice<Layout, rank - 1, V, Kokkos::Impl::ALL_t, Args...>;
616  using value_type = typename next::value_type;
617 
618  static value_type get(V const& src, const size_t i, Args... args) {
619  return next::get(src, i, Kokkos::ALL, args...);
620  }
621 };
622 
623 template <typename V, typename... Args>
624 struct Slice<Kokkos::LayoutRight, 1, V, Args...> {
625  using value_type =
626  typename Kokkos::Impl::ViewMapping<void, V, const size_t, Args...>::type;
627  static value_type get(V const& src, const size_t i, Args... args) {
628  return Kokkos::subview(src, i, args...);
629  }
630 };
631 
632 template <typename V, typename... Args>
633 struct Slice<Kokkos::LayoutLeft, 1, V, Args...> {
634  using value_type =
635  typename Kokkos::Impl::ViewMapping<void, V, Args..., const size_t>::type;
636  static value_type get(V const& src, const size_t i, Args... args) {
637  return Kokkos::subview(src, args..., i);
638  }
639 };
640 
641 template <typename ExecSpace, typename ValueType, typename Op>
642 struct ReduceDuplicates;
643 
644 template <typename ExecSpace, typename ValueType, typename Op>
645 struct ReduceDuplicatesBase {
646  using Derived = ReduceDuplicates<ExecSpace, ValueType, Op>;
647  ValueType const* src;
648  ValueType* dst;
649  size_t stride;
650  size_t start;
651  size_t n;
652  ReduceDuplicatesBase(ExecSpace const& exec_space, ValueType const* src_in,
653  ValueType* dest_in, size_t stride_in, size_t start_in,
654  size_t n_in, std::string const& name)
655  : src(src_in), dst(dest_in), stride(stride_in), start(start_in), n(n_in) {
656  parallel_for(
657  std::string("Kokkos::ScatterView::ReduceDuplicates [") + name + "]",
658  RangePolicy<ExecSpace, size_t>(exec_space, 0, stride),
659  static_cast<Derived const&>(*this));
660  }
661 };
662 
663 /* ReduceDuplicates -- Perform reduction on destination array using strided
664  * source Use ScatterValue<> specific to operation to wrap destination array so
665  * that the reduction operation can be accessed via the update(rhs) function */
666 template <typename ExecSpace, typename ValueType, typename Op>
667 struct ReduceDuplicates
668  : public ReduceDuplicatesBase<ExecSpace, ValueType, Op> {
669  using Base = ReduceDuplicatesBase<ExecSpace, ValueType, Op>;
670  ReduceDuplicates(ExecSpace const& exec_space, ValueType const* src_in,
671  ValueType* dst_in, size_t stride_in, size_t start_in,
672  size_t n_in, std::string const& name)
673  : Base(exec_space, src_in, dst_in, stride_in, start_in, n_in, name) {}
674  KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const {
675  for (size_t j = Base::start; j < Base::n; ++j) {
676  ScatterValue<ValueType, Op, ExecSpace,
677  Kokkos::Experimental::ScatterNonAtomic>
678  sv(Base::dst[i]);
679  sv.update(Base::src[i + Base::stride * j]);
680  }
681  }
682 };
683 
684 template <typename ExecSpace, typename ValueType, typename Op>
685 struct ResetDuplicates;
686 
687 template <typename ExecSpace, typename ValueType, typename Op>
688 struct ResetDuplicatesBase {
689  using Derived = ResetDuplicates<ExecSpace, ValueType, Op>;
690  ValueType* data;
691  ResetDuplicatesBase(ExecSpace const& exec_space, ValueType* data_in,
692  size_t size_in, std::string const& name)
693  : data(data_in) {
694  parallel_for(
695  std::string("Kokkos::ScatterView::ResetDuplicates [") + name + "]",
696  RangePolicy<ExecSpace, size_t>(exec_space, 0, size_in),
697  static_cast<Derived const&>(*this));
698  }
699 };
700 
701 /* ResetDuplicates -- Perform reset on destination array
702  * Use ScatterValue<> specific to operation to wrap destination array so that
703  * the reset operation can be accessed via the reset() function */
704 template <typename ExecSpace, typename ValueType, typename Op>
705 struct ResetDuplicates : public ResetDuplicatesBase<ExecSpace, ValueType, Op> {
706  using Base = ResetDuplicatesBase<ExecSpace, ValueType, Op>;
707  ResetDuplicates(ExecSpace const& exec_space, ValueType* data_in,
708  size_t size_in, std::string const& name)
709  : Base(exec_space, data_in, size_in, name) {}
710  KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const {
711  ScatterValue<ValueType, Op, ExecSpace,
712  Kokkos::Experimental::ScatterNonAtomic>
713  sv(Base::data[i]);
714  sv.reset();
715  }
716 };
717 
718 template <typename... P>
719 void check_scatter_view_allocation_properties_argument(
720  ViewCtorProp<P...> const&) {
721  static_assert(ViewCtorProp<P...>::has_execution_space &&
722  ViewCtorProp<P...>::has_label &&
723  ViewCtorProp<P...>::initialize,
724  "Allocation property must have an execution name as well as a "
725  "label, and must perform the view initialization");
726 }
727 
728 } // namespace Experimental
729 } // namespace Impl
730 } // namespace Kokkos
731 
732 namespace Kokkos {
733 namespace Experimental {
734 
735 template <typename DataType,
736  typename Layout = Kokkos::DefaultExecutionSpace::array_layout,
737  typename DeviceType = Kokkos::DefaultExecutionSpace,
738  typename Op = Kokkos::Experimental::ScatterSum,
739  typename Duplication = typename Kokkos::Impl::Experimental::
740  DefaultDuplication<typename DeviceType::execution_space>::type,
741  typename Contribution =
742  typename Kokkos::Impl::Experimental::DefaultContribution<
743  typename DeviceType::execution_space, Duplication>::type>
744 class ScatterView;
745 
746 template <typename DataType, typename Op, typename DeviceType, typename Layout,
747  typename Duplication, typename Contribution,
748  typename OverrideContribution>
749 class ScatterAccess;
750 
751 // non-duplicated implementation
752 template <typename DataType, typename Op, typename DeviceType, typename Layout,
753  typename Contribution>
754 class ScatterView<DataType, Layout, DeviceType, Op, ScatterNonDuplicated,
755  Contribution> {
756  public:
757  using execution_space = typename DeviceType::execution_space;
758  using memory_space = typename DeviceType::memory_space;
759  using device_type = Kokkos::Device<execution_space, memory_space>;
760  using original_view_type = Kokkos::View<DataType, Layout, device_type>;
761  using original_value_type = typename original_view_type::value_type;
762  using original_reference_type = typename original_view_type::reference_type;
763  friend class ScatterAccess<DataType, Op, DeviceType, Layout,
764  ScatterNonDuplicated, Contribution,
765  ScatterNonAtomic>;
766  friend class ScatterAccess<DataType, Op, DeviceType, Layout,
767  ScatterNonDuplicated, Contribution, ScatterAtomic>;
768  template <class, class, class, class, class, class>
769  friend class ScatterView;
770 
771  ScatterView() = default;
772 
773  template <typename RT, typename... RP>
774  ScatterView(View<RT, RP...> const& original_view)
775  : internal_view(original_view) {}
776 
777  template <typename RT, typename... P, typename... RP>
778  ScatterView(execution_space const& /* exec_space */,
779  View<RT, RP...> const& original_view)
780  : internal_view(original_view) {}
781 
782  template <typename... Dims>
783  ScatterView(std::string const& name, Dims... dims)
784  : internal_view(name, dims...) {}
785 
786  // This overload allows specifying an execution space instance to be
787  // used by passing, e.g., Kokkos::view_alloc(exec_space, "label") as
788  // first argument.
789  template <typename... P, typename... Dims>
790  ScatterView(::Kokkos::Impl::ViewCtorProp<P...> const& arg_prop, Dims... dims)
791  : internal_view(arg_prop, dims...) {
792  using ::Kokkos::Impl::Experimental::
793  check_scatter_view_allocation_properties_argument;
794  check_scatter_view_allocation_properties_argument(arg_prop);
795  }
796 
797  template <typename OtherDataType, typename OtherDeviceType>
798  KOKKOS_FUNCTION ScatterView(
799  const ScatterView<OtherDataType, Layout, OtherDeviceType, Op,
800  ScatterNonDuplicated, Contribution>& other_view)
801  : internal_view(other_view.internal_view) {}
802 
803  template <typename OtherDataType, typename OtherDeviceType>
804  KOKKOS_FUNCTION void operator=(
805  const ScatterView<OtherDataType, Layout, OtherDeviceType, Op,
806  ScatterNonDuplicated, Contribution>& other_view) {
807  internal_view = other_view.internal_view;
808  }
809 
810  template <typename OverrideContribution = Contribution>
811  KOKKOS_FORCEINLINE_FUNCTION
812  ScatterAccess<DataType, Op, DeviceType, Layout, ScatterNonDuplicated,
813  Contribution, OverrideContribution>
814  access() const {
815  return ScatterAccess<DataType, Op, DeviceType, Layout, ScatterNonDuplicated,
816  Contribution, OverrideContribution>(*this);
817  }
818 
819  original_view_type subview() const { return internal_view; }
820 
821  KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
822  return internal_view.is_allocated();
823  }
824 
825  template <typename DT, typename... RP>
826  void contribute_into(View<DT, RP...> const& dest) const {
827  contribute_into(execution_space(), dest);
828  }
829 
830  template <typename DT, typename... RP>
831  void contribute_into(execution_space const& exec_space,
832  View<DT, RP...> const& dest) const {
833  using dest_type = View<DT, RP...>;
834  static_assert(std::is_same<typename dest_type::array_layout, Layout>::value,
835  "ScatterView contribute destination has different layout");
836  static_assert(
838  execution_space, typename dest_type::memory_space>::accessible,
839  "ScatterView contribute destination memory space not accessible");
840  if (dest.data() == internal_view.data()) return;
841  Kokkos::Impl::Experimental::ReduceDuplicates<execution_space,
842  original_value_type, Op>(
843  exec_space, internal_view.data(), dest.data(), 0, 0, 1,
844  internal_view.label());
845  }
846 
847  void reset(execution_space const& exec_space = execution_space()) {
848  Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
849  original_value_type, Op>(
850  exec_space, internal_view.data(), internal_view.size(),
851  internal_view.label());
852  }
853  template <typename DT, typename... RP>
854  void reset_except(View<DT, RP...> const& view) {
855  reset_except(execution_space(), view);
856  }
857 
858  template <typename DT, typename... RP>
859  void reset_except(const execution_space& exec_space,
860  View<DT, RP...> const& view) {
861  if (view.data() != internal_view.data()) reset(exec_space);
862  }
863 
864  void resize(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
865  const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
866  const size_t n6 = 0, const size_t n7 = 0) {
867  ::Kokkos::resize(internal_view, n0, n1, n2, n3, n4, n5, n6, n7);
868  }
869 
870  void realloc(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
871  const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
872  const size_t n6 = 0, const size_t n7 = 0) {
873  ::Kokkos::realloc(internal_view, n0, n1, n2, n3, n4, n5, n6, n7);
874  }
875 
876  protected:
877  template <typename... Args>
878  KOKKOS_FORCEINLINE_FUNCTION original_reference_type at(Args... args) const {
879  return internal_view(args...);
880  }
881 
882  private:
883  using internal_view_type = original_view_type;
884  internal_view_type internal_view;
885 };
886 
887 template <typename DataType, typename Op, typename DeviceType, typename Layout,
888  typename Contribution, typename OverrideContribution>
889 class ScatterAccess<DataType, Op, DeviceType, Layout, ScatterNonDuplicated,
890  Contribution, OverrideContribution> {
891  public:
892  using view_type = ScatterView<DataType, Layout, DeviceType, Op,
893  ScatterNonDuplicated, Contribution>;
894  using original_value_type = typename view_type::original_value_type;
895  using value_type = Kokkos::Impl::Experimental::ScatterValue<
896  original_value_type, Op, DeviceType, OverrideContribution>;
897 
898  KOKKOS_INLINE_FUNCTION
899  ScatterAccess() : view(view_type()) {}
900 
901  KOKKOS_INLINE_FUNCTION
902  ScatterAccess(view_type const& view_in) : view(view_in) {}
903  KOKKOS_DEFAULTED_FUNCTION
904  ~ScatterAccess() = default;
905 
906  template <typename... Args>
907  KOKKOS_FORCEINLINE_FUNCTION value_type operator()(Args... args) const {
908  return view.at(args...);
909  }
910 
911  template <typename Arg>
912  KOKKOS_FORCEINLINE_FUNCTION
913  typename std::enable_if<view_type::original_view_type::rank == 1 &&
914  std::is_integral<Arg>::value,
915  value_type>::type
916  operator[](Arg arg) const {
917  return view.at(arg);
918  }
919 
920  private:
921  view_type const& view;
922 };
923 
924 // duplicated implementation
925 // LayoutLeft and LayoutRight are different enough that we'll just specialize
926 // each
927 
928 template <typename DataType, typename Op, typename DeviceType,
929  typename Contribution>
930 class ScatterView<DataType, Kokkos::LayoutRight, DeviceType, Op,
931  ScatterDuplicated, Contribution> {
932  public:
933  using execution_space = typename DeviceType::execution_space;
934  using memory_space = typename DeviceType::memory_space;
935  using device_type = Kokkos::Device<execution_space, memory_space>;
936  using original_view_type =
938  using original_value_type = typename original_view_type::value_type;
939  using original_reference_type = typename original_view_type::reference_type;
940  friend class ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutRight,
941  ScatterDuplicated, Contribution, ScatterNonAtomic>;
942  friend class ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutRight,
943  ScatterDuplicated, Contribution, ScatterAtomic>;
944  template <class, class, class, class, class, class>
945  friend class ScatterView;
946 
947  using data_type_info =
948  typename Kokkos::Impl::Experimental::DuplicatedDataType<
949  DataType, Kokkos::LayoutRight>;
950  using internal_data_type = typename data_type_info::value_type;
951  using internal_view_type =
952  Kokkos::View<internal_data_type, Kokkos::LayoutRight, device_type>;
953 
954  ScatterView() = default;
955 
956  template <typename OtherDataType, typename OtherDeviceType>
957  KOKKOS_FUNCTION ScatterView(
958  const ScatterView<OtherDataType, Kokkos::LayoutRight, OtherDeviceType, Op,
959  ScatterDuplicated, Contribution>& other_view)
960  : unique_token(other_view.unique_token),
961  internal_view(other_view.internal_view) {}
962 
963  template <typename OtherDataType, typename OtherDeviceType>
964  KOKKOS_FUNCTION void operator=(
965  const ScatterView<OtherDataType, Kokkos::LayoutRight, OtherDeviceType, Op,
966  ScatterDuplicated, Contribution>& other_view) {
967  unique_token = other_view.unique_token;
968  internal_view = other_view.internal_view;
969  }
970 
971  template <typename RT, typename... RP>
972  ScatterView(View<RT, RP...> const& original_view)
973  : ScatterView(execution_space(), original_view) {}
974 
975  template <typename RT, typename... P, typename... RP>
976  ScatterView(execution_space const& exec_space,
977  View<RT, RP...> const& original_view)
978  : unique_token(),
979  internal_view(
980  view_alloc(WithoutInitializing,
981  std::string("duplicated_") + original_view.label(),
982  exec_space),
983  unique_token.size(),
984  original_view.rank_dynamic > 0 ? original_view.extent(0)
985  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
986  original_view.rank_dynamic > 1 ? original_view.extent(1)
987  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
988  original_view.rank_dynamic > 2 ? original_view.extent(2)
989  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
990  original_view.rank_dynamic > 3 ? original_view.extent(3)
991  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
992  original_view.rank_dynamic > 4 ? original_view.extent(4)
993  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
994  original_view.rank_dynamic > 5 ? original_view.extent(5)
995  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
996  original_view.rank_dynamic > 6 ? original_view.extent(6)
997  : KOKKOS_IMPL_CTOR_DEFAULT_ARG)
998 
999  {
1000  reset(exec_space);
1001  }
1002 
1003  template <typename... Dims>
1004  ScatterView(std::string const& name, Dims... dims)
1005  : ScatterView(view_alloc(execution_space(), name), dims...) {}
1006 
1007  // This overload allows specifying an execution space instance to be
1008  // used by passing, e.g., Kokkos::view_alloc(exec_space, "label") as
1009  // first argument.
1010  template <typename... P, typename... Dims>
1011  ScatterView(::Kokkos::Impl::ViewCtorProp<P...> const& arg_prop, Dims... dims)
1012  : internal_view(view_alloc(WithoutInitializing,
1013  static_cast<::Kokkos::Impl::ViewCtorProp<
1014  void, std::string> const&>(arg_prop)
1015  .value),
1016  unique_token.size(), dims...) {
1017  using ::Kokkos::Impl::Experimental::
1018  check_scatter_view_allocation_properties_argument;
1019  check_scatter_view_allocation_properties_argument(arg_prop);
1020 
1021  auto const exec_space =
1022  static_cast<::Kokkos::Impl::ViewCtorProp<void, execution_space> const&>(
1023  arg_prop)
1024  .value;
1025  reset(exec_space);
1026  }
1027 
1028  template <typename OverrideContribution = Contribution>
1029  KOKKOS_FORCEINLINE_FUNCTION
1030  ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutRight,
1031  ScatterDuplicated, Contribution, OverrideContribution>
1032  access() const {
1033  return ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutRight,
1034  ScatterDuplicated, Contribution, OverrideContribution>(
1035  *this);
1036  }
1037 
1038  typename Kokkos::Impl::Experimental::Slice<Kokkos::LayoutRight,
1039  internal_view_type::rank,
1040  internal_view_type>::value_type
1041  subview() const {
1042  return Kokkos::Impl::Experimental::Slice<
1043  Kokkos::LayoutRight, internal_view_type::Rank,
1044  internal_view_type>::get(internal_view, 0);
1045  }
1046 
1047  KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
1048  return internal_view.is_allocated();
1049  }
1050 
1051  template <typename DT, typename... RP>
1052  void contribute_into(View<DT, RP...> const& dest) const {
1053  contribute_into(execution_space(), dest);
1054  }
1055 
1056  template <typename DT, typename... RP>
1057  void contribute_into(execution_space const& exec_space,
1058  View<DT, RP...> const& dest) const {
1059  using dest_type = View<DT, RP...>;
1060  static_assert(std::is_same<typename dest_type::array_layout,
1061  Kokkos::LayoutRight>::value,
1062  "ScatterView deep_copy destination has different layout");
1063  static_assert(
1065  execution_space, typename dest_type::memory_space>::accessible,
1066  "ScatterView deep_copy destination memory space not accessible");
1067  bool is_equal = (dest.data() == internal_view.data());
1068  size_t start = is_equal ? 1 : 0;
1069  Kokkos::Impl::Experimental::ReduceDuplicates<execution_space,
1070  original_value_type, Op>(
1071  exec_space, internal_view.data(), dest.data(), internal_view.stride(0),
1072  start, internal_view.extent(0), internal_view.label());
1073  }
1074 
1075  void reset(execution_space const& exec_space = execution_space()) {
1076  Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
1077  original_value_type, Op>(
1078  exec_space, internal_view.data(), internal_view.size(),
1079  internal_view.label());
1080  }
1081 
1082  template <typename DT, typename... RP>
1083  void reset_except(View<DT, RP...> const& view) {
1084  reset_except(execution_space(), view);
1085  }
1086 
1087  template <typename DT, typename... RP>
1088  void reset_except(execution_space const& exec_space,
1089  View<DT, RP...> const& view) {
1090  if (view.data() != internal_view.data()) {
1091  reset(exec_space);
1092  return;
1093  }
1094  Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
1095  original_value_type, Op>(
1096  exec_space, internal_view.data() + view.size(),
1097  internal_view.size() - view.size(), internal_view.label());
1098  }
1099 
1100  void resize(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
1101  const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
1102  const size_t n6 = 0) {
1103  ::Kokkos::resize(internal_view, unique_token.size(), n0, n1, n2, n3, n4, n5,
1104  n6);
1105  }
1106 
1107  void realloc(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
1108  const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
1109  const size_t n6 = 0) {
1110  ::Kokkos::realloc(internal_view, unique_token.size(), n0, n1, n2, n3, n4,
1111  n5, n6);
1112  }
1113 
1114  protected:
1115  template <typename... Args>
1116  KOKKOS_FORCEINLINE_FUNCTION original_reference_type at(int rank,
1117  Args... args) const {
1118  return internal_view(rank, args...);
1119  }
1120 
1121  protected:
1122  using unique_token_type = Kokkos::Experimental::UniqueToken<
1123  execution_space, Kokkos::Experimental::UniqueTokenScope::Global>;
1124 
1125  unique_token_type unique_token;
1126  internal_view_type internal_view;
1127 };
1128 
1129 template <typename DataType, typename Op, typename DeviceType,
1130  typename Contribution>
1131 class ScatterView<DataType, Kokkos::LayoutLeft, DeviceType, Op,
1132  ScatterDuplicated, Contribution> {
1133  public:
1134  using execution_space = typename DeviceType::execution_space;
1135  using memory_space = typename DeviceType::memory_space;
1136  using device_type = Kokkos::Device<execution_space, memory_space>;
1137  using original_view_type =
1139  using original_value_type = typename original_view_type::value_type;
1140  using original_reference_type = typename original_view_type::reference_type;
1141  friend class ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutLeft,
1142  ScatterDuplicated, Contribution, ScatterNonAtomic>;
1143  friend class ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutLeft,
1144  ScatterDuplicated, Contribution, ScatterAtomic>;
1145  template <class, class, class, class, class, class>
1146  friend class ScatterView;
1147 
1148  using data_type_info =
1149  typename Kokkos::Impl::Experimental::DuplicatedDataType<
1150  DataType, Kokkos::LayoutLeft>;
1151  using internal_data_type = typename data_type_info::value_type;
1152  using internal_view_type =
1153  Kokkos::View<internal_data_type, Kokkos::LayoutLeft, device_type>;
1154 
1155  ScatterView() = default;
1156 
1157  template <typename RT, typename... RP>
1158  ScatterView(View<RT, RP...> const& original_view)
1159  : ScatterView(execution_space(), original_view) {}
1160 
1161  template <typename RT, typename... P, typename... RP>
1162  ScatterView(execution_space const& exec_space,
1163  View<RT, RP...> const& original_view)
1164  : unique_token() {
1165  size_t arg_N[8] = {original_view.rank > 0 ? original_view.extent(0)
1166  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1167  original_view.rank > 1 ? original_view.extent(1)
1168  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1169  original_view.rank > 2 ? original_view.extent(2)
1170  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1171  original_view.rank > 3 ? original_view.extent(3)
1172  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1173  original_view.rank > 4 ? original_view.extent(4)
1174  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1175  original_view.rank > 5 ? original_view.extent(5)
1176  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1177  original_view.rank > 6 ? original_view.extent(6)
1178  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1179  KOKKOS_IMPL_CTOR_DEFAULT_ARG};
1180  arg_N[internal_view_type::rank - 1] = unique_token.size();
1181  internal_view = internal_view_type(
1182  view_alloc(WithoutInitializing,
1183  std::string("duplicated_") + original_view.label(),
1184  exec_space),
1185  arg_N[0], arg_N[1], arg_N[2], arg_N[3], arg_N[4], arg_N[5], arg_N[6],
1186  arg_N[7]);
1187  reset(exec_space);
1188  }
1189 
1190  template <typename... Dims>
1191  ScatterView(std::string const& name, Dims... dims)
1192  : ScatterView(view_alloc(execution_space(), name), dims...) {}
1193 
1194  // This overload allows specifying an execution space instance to be
1195  // used by passing, e.g., Kokkos::view_alloc(exec_space, "label") as
1196  // first argument.
1197  template <typename... P, typename... Dims>
1198  ScatterView(::Kokkos::Impl::ViewCtorProp<P...> const& arg_prop,
1199  Dims... dims) {
1200  using ::Kokkos::Impl::Experimental::
1201  check_scatter_view_allocation_properties_argument;
1202  check_scatter_view_allocation_properties_argument(arg_prop);
1203 
1204  original_view_type original_view;
1205  size_t arg_N[8] = {original_view.rank > 0 ? original_view.static_extent(0)
1206  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1207  original_view.rank > 1 ? original_view.static_extent(1)
1208  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1209  original_view.rank > 2 ? original_view.static_extent(2)
1210  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1211  original_view.rank > 3 ? original_view.static_extent(3)
1212  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1213  original_view.rank > 4 ? original_view.static_extent(4)
1214  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1215  original_view.rank > 5 ? original_view.static_extent(5)
1216  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1217  original_view.rank > 6 ? original_view.static_extent(6)
1218  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1219  KOKKOS_IMPL_CTOR_DEFAULT_ARG};
1220  Kokkos::Impl::Experimental::args_to_array(arg_N, 0, dims...);
1221  arg_N[internal_view_type::rank - 1] = unique_token.size();
1222 
1223  auto const name =
1224  static_cast<::Kokkos::Impl::ViewCtorProp<void, std::string> const&>(
1225  arg_prop)
1226  .value;
1227  internal_view = internal_view_type(view_alloc(WithoutInitializing, name),
1228  arg_N[0], arg_N[1], arg_N[2], arg_N[3],
1229  arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
1230 
1231  auto const exec_space =
1232  static_cast<::Kokkos::Impl::ViewCtorProp<void, execution_space> const&>(
1233  arg_prop)
1234  .value;
1235  reset(exec_space);
1236  }
1237 
1238  template <typename OtherDataType, typename OtherDeviceType>
1239  KOKKOS_FUNCTION ScatterView(
1240  const ScatterView<OtherDataType, Kokkos::LayoutLeft, OtherDeviceType, Op,
1241  ScatterDuplicated, Contribution>& other_view)
1242  : unique_token(other_view.unique_token),
1243  internal_view(other_view.internal_view) {}
1244 
1245  template <typename OtherDataType, typename OtherDeviceType>
1246  KOKKOS_FUNCTION void operator=(
1247  const ScatterView<OtherDataType, Kokkos::LayoutLeft, OtherDeviceType, Op,
1248  ScatterDuplicated, Contribution>& other_view) {
1249  unique_token = other_view.unique_token;
1250  internal_view = other_view.internal_view;
1251  }
1252 
1253  template <typename OverrideContribution = Contribution>
1254  KOKKOS_FORCEINLINE_FUNCTION
1255  ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutLeft,
1256  ScatterDuplicated, Contribution, OverrideContribution>
1257  access() const {
1258  return ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutLeft,
1259  ScatterDuplicated, Contribution, OverrideContribution>(
1260  *this);
1261  }
1262 
1263  typename Kokkos::Impl::Experimental::Slice<Kokkos::LayoutLeft,
1264  internal_view_type::rank,
1265  internal_view_type>::value_type
1266  subview() const {
1267  return Kokkos::Impl::Experimental::Slice<
1268  Kokkos::LayoutLeft, internal_view_type::rank,
1269  internal_view_type>::get(internal_view, 0);
1270  }
1271 
1272  KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
1273  return internal_view.is_allocated();
1274  }
1275 
1276  template <typename... RP>
1277  void contribute_into(View<RP...> const& dest) const {
1278  contribute_into(execution_space(), dest);
1279  }
1280 
1281  template <typename... RP>
1282  void contribute_into(execution_space const& exec_space,
1283  View<RP...> const& dest) const {
1284  using dest_type = View<RP...>;
1285  static_assert(
1286  std::is_same<typename dest_type::value_type,
1287  typename original_view_type::non_const_value_type>::value,
1288  "ScatterView deep_copy destination has wrong value_type");
1289  static_assert(std::is_same<typename dest_type::array_layout,
1290  Kokkos::LayoutLeft>::value,
1291  "ScatterView deep_copy destination has different layout");
1292  static_assert(
1294  execution_space, typename dest_type::memory_space>::accessible,
1295  "ScatterView deep_copy destination memory space not accessible");
1296  auto extent = internal_view.extent(internal_view_type::rank - 1);
1297  bool is_equal = (dest.data() == internal_view.data());
1298  size_t start = is_equal ? 1 : 0;
1299  Kokkos::Impl::Experimental::ReduceDuplicates<execution_space,
1300  original_value_type, Op>(
1301  exec_space, internal_view.data(), dest.data(),
1302  internal_view.stride(internal_view_type::rank - 1), start, extent,
1303  internal_view.label());
1304  }
1305 
1306  void reset(execution_space const& exec_space = execution_space()) {
1307  Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
1308  original_value_type, Op>(
1309  exec_space, internal_view.data(), internal_view.size(),
1310  internal_view.label());
1311  }
1312 
1313  template <typename DT, typename... RP>
1314  void reset_except(View<DT, RP...> const& view) {
1315  reset_except(execution_space(), view);
1316  }
1317 
1318  template <typename DT, typename... RP>
1319  void reset_except(execution_space const& exec_space,
1320  View<DT, RP...> const& view) {
1321  if (view.data() != internal_view.data()) {
1322  reset(exec_space);
1323  return;
1324  }
1325  Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
1326  original_value_type, Op>(
1327  exec_space, internal_view.data() + view.size(),
1328  internal_view.size() - view.size(), internal_view.label());
1329  }
1330 
1331  void resize(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
1332  const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
1333  const size_t n6 = 0) {
1334  size_t arg_N[8] = {n0, n1, n2, n3, n4, n5, n6, 0};
1335  const int i = internal_view.rank - 1;
1336  arg_N[i] = unique_token.size();
1337 
1338  ::Kokkos::resize(internal_view, arg_N[0], arg_N[1], arg_N[2], arg_N[3],
1339  arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
1340  }
1341 
1342  void realloc(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
1343  const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
1344  const size_t n6 = 0) {
1345  size_t arg_N[8] = {n0, n1, n2, n3, n4, n5, n6, 0};
1346  const int i = internal_view.rank - 1;
1347  arg_N[i] = unique_token.size();
1348 
1349  ::Kokkos::realloc(internal_view, arg_N[0], arg_N[1], arg_N[2], arg_N[3],
1350  arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
1351  }
1352 
1353  protected:
1354  template <typename... Args>
1355  KOKKOS_FORCEINLINE_FUNCTION original_reference_type at(int thread_id,
1356  Args... args) const {
1357  return internal_view(args..., thread_id);
1358  }
1359 
1360  protected:
1361  using unique_token_type = Kokkos::Experimental::UniqueToken<
1362  execution_space, Kokkos::Experimental::UniqueTokenScope::Global>;
1363 
1364  unique_token_type unique_token;
1365  internal_view_type internal_view;
1366 };
1367 
1368 /* This object has to be separate in order to store the thread ID, which cannot
1369  be obtained until one is inside a parallel construct, and may be relatively
1370  expensive to obtain at every contribution
1371  (calls a non-inlined function, looks up a thread-local variable).
1372  Due to the expense, it is sensible to query it at most once per parallel
1373  iterate (ideally once per thread, but parallel_for doesn't expose that) and
1374  then store it in a stack variable.
1375  ScatterAccess serves as a non-const object on the stack which can store the
1376  thread ID */
1377 
1378 template <typename DataType, typename Op, typename DeviceType, typename Layout,
1379  typename Contribution, typename OverrideContribution>
1380 class ScatterAccess<DataType, Op, DeviceType, Layout, ScatterDuplicated,
1381  Contribution, OverrideContribution> {
1382  public:
1383  using view_type = ScatterView<DataType, Layout, DeviceType, Op,
1384  ScatterDuplicated, Contribution>;
1385  using original_value_type = typename view_type::original_value_type;
1386  using value_type = Kokkos::Impl::Experimental::ScatterValue<
1387  original_value_type, Op, DeviceType, OverrideContribution>;
1388 
1389  KOKKOS_FORCEINLINE_FUNCTION
1390  ScatterAccess(view_type const& view_in)
1391  : view(view_in), thread_id(view_in.unique_token.acquire()) {}
1392 
1393  KOKKOS_FORCEINLINE_FUNCTION
1394  ~ScatterAccess() {
1395  if (thread_id != ~thread_id_type(0)) view.unique_token.release(thread_id);
1396  }
1397 
1398  template <typename... Args>
1399  KOKKOS_FORCEINLINE_FUNCTION value_type operator()(Args... args) const {
1400  return view.at(thread_id, args...);
1401  }
1402 
1403  template <typename Arg>
1404  KOKKOS_FORCEINLINE_FUNCTION
1405  typename std::enable_if<view_type::original_view_type::rank == 1 &&
1406  std::is_integral<Arg>::value,
1407  value_type>::type
1408  operator[](Arg arg) const {
1409  return view.at(thread_id, arg);
1410  }
1411 
1412  private:
1413  view_type const& view;
1414 
1415  // simplify RAII by disallowing copies
1416  ScatterAccess(ScatterAccess const& other) = delete;
1417  ScatterAccess& operator=(ScatterAccess const& other) = delete;
1418  ScatterAccess& operator=(ScatterAccess&& other) = delete;
1419 
1420  public:
1421  // do need to allow moves though, for the common
1422  // auto b = a.access();
1423  // that assignments turns into a move constructor call
1424  KOKKOS_FORCEINLINE_FUNCTION
1425  ScatterAccess(ScatterAccess&& other)
1426  : view(other.view), thread_id(other.thread_id) {
1427  other.thread_id = ~thread_id_type(0);
1428  }
1429 
1430  private:
1431  using unique_token_type = typename view_type::unique_token_type;
1432  using thread_id_type = typename unique_token_type::size_type;
1433  thread_id_type thread_id;
1434 };
1435 
1436 template <typename Op = Kokkos::Experimental::ScatterSum,
1437  typename Duplication = void, typename Contribution = void,
1438  typename RT, typename... RP>
1439 ScatterView<
1440  RT, typename ViewTraits<RT, RP...>::array_layout,
1441  typename ViewTraits<RT, RP...>::device_type, Op,
1442  std::conditional_t<
1443  std::is_same<Duplication, void>::value,
1444  typename Kokkos::Impl::Experimental::DefaultDuplication<
1445  typename ViewTraits<RT, RP...>::execution_space>::type,
1446  Duplication>,
1447  std::conditional_t<
1448  std::is_same<Contribution, void>::value,
1449  typename Kokkos::Impl::Experimental::DefaultContribution<
1450  typename ViewTraits<RT, RP...>::execution_space,
1451  typename std::conditional_t<
1452  std::is_same<Duplication, void>::value,
1453  typename Kokkos::Impl::Experimental::DefaultDuplication<
1454  typename ViewTraits<RT, RP...>::execution_space>::type,
1455  Duplication>>::type,
1456  Contribution>>
1457 create_scatter_view(View<RT, RP...> const& original_view) {
1458  return original_view; // implicit ScatterView constructor call
1459 }
1460 
1461 template <typename Op, typename RT, typename... RP>
1462 ScatterView<
1463  RT, typename ViewTraits<RT, RP...>::array_layout,
1464  typename ViewTraits<RT, RP...>::device_type, Op,
1465  typename Kokkos::Impl::Experimental::DefaultDuplication<
1466  typename ViewTraits<RT, RP...>::execution_space>::type,
1467  typename Kokkos::Impl::Experimental::DefaultContribution<
1468  typename ViewTraits<RT, RP...>::execution_space,
1469  typename Kokkos::Impl::Experimental::DefaultDuplication<
1470  typename ViewTraits<RT, RP...>::execution_space>::type>::type>
1471 create_scatter_view(Op, View<RT, RP...> const& original_view) {
1472  return original_view; // implicit ScatterView constructor call
1473 }
1474 
1475 template <typename Op, typename Duplication, typename Contribution, typename RT,
1476  typename... RP>
1477 ScatterView<RT, typename ViewTraits<RT, RP...>::array_layout,
1478  typename ViewTraits<RT, RP...>::device_type, Op, Duplication,
1479  Contribution>
1480 create_scatter_view(Op, Duplication, Contribution,
1481  View<RT, RP...> const& original_view) {
1482  return original_view; // implicit ScatterView constructor call
1483 }
1484 
1485 } // namespace Experimental
1486 } // namespace Kokkos
1487 
1488 namespace Kokkos {
1489 namespace Experimental {
1490 
1491 template <typename DT1, typename DT2, typename LY, typename ES, typename OP,
1492  typename CT, typename DP, typename... VP>
1493 void contribute(
1494  typename ES::execution_space const& exec_space, View<DT1, VP...>& dest,
1495  Kokkos::Experimental::ScatterView<DT2, LY, ES, OP, CT, DP> const& src) {
1496  src.contribute_into(exec_space, dest);
1497 }
1498 
1499 template <typename DT1, typename DT2, typename LY, typename ES, typename OP,
1500  typename CT, typename DP, typename... VP>
1501 void contribute(
1502  View<DT1, VP...>& dest,
1503  Kokkos::Experimental::ScatterView<DT2, LY, ES, OP, CT, DP> const& src) {
1504  using execution_space = typename ES::execution_space;
1505  contribute(execution_space{}, dest, src);
1506 }
1507 
1508 } // namespace Experimental
1509 } // namespace Kokkos
1510 
1511 namespace Kokkos {
1512 
1513 template <typename DT, typename LY, typename ES, typename OP, typename CT,
1514  typename DP, typename... IS>
1515 void realloc(
1516  Kokkos::Experimental::ScatterView<DT, LY, ES, OP, CT, DP>& scatter_view,
1517  IS... is) {
1518  scatter_view.realloc(is...);
1519 }
1520 
1521 template <typename DT, typename LY, typename ES, typename OP, typename CT,
1522  typename DP, typename... IS>
1523 void resize(
1524  Kokkos::Experimental::ScatterView<DT, LY, ES, OP, CT, DP>& scatter_view,
1525  IS... is) {
1526  scatter_view.resize(is...);
1527 }
1528 
1529 } // namespace Kokkos
1530 
1531 #endif
View
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename std::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=nullptr)
Execute functor in parallel according to the execution policy.
class to generate unique ids base on the required amount of concurrency
View to an array of data.
Memory layout tag indicating left-to-right (Fortran scheme) striding of multi-indices.
Memory layout tag indicating right-to-left (C or lexigraphical scheme) striding of multi-indices.
Can AccessSpace access MemorySpace ?