Kokkos Core Kernels Package Version of the Day
Loading...
Searching...
No Matches
Kokkos_ScatterView.hpp
Go to the documentation of this file.
1/*
2//@HEADER
3// ************************************************************************
4//
5// Kokkos v. 3.0
6// Copyright (2020) National Technology & Engineering
7// Solutions of Sandia, LLC (NTESS).
8//
9// Under the terms of Contract DE-NA0003525 with NTESS,
10// the U.S. Government retains certain rights in this software.
11//
12// Redistribution and use in source and binary forms, with or without
13// modification, are permitted provided that the following conditions are
14// met:
15//
16// 1. Redistributions of source code must retain the above copyright
17// notice, this list of conditions and the following disclaimer.
18//
19// 2. Redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution.
22//
23// 3. Neither the name of the Corporation nor the names of the
24// contributors may be used to endorse or promote products derived from
25// this software without specific prior written permission.
26//
27// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38//
39// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40//
41// ************************************************************************
42//@HEADER
43*/
44
50
51#ifndef KOKKOS_SCATTER_VIEW_HPP
52#define KOKKOS_SCATTER_VIEW_HPP
53
54#include <Kokkos_Core.hpp>
55#include <utility>
56
57namespace Kokkos {
58namespace Experimental {
59
60/*
61 * Reduction Type list
62 * - These corresponds to subset of the reducers in parallel_reduce
63 * - See Implementations of ScatterValue for details.
64 */
65struct ScatterSum {};
66struct ScatterProd {};
67struct ScatterMax {};
68struct ScatterMin {};
69
70struct ScatterNonDuplicated {};
71struct ScatterDuplicated {};
72
73struct ScatterNonAtomic {};
74struct ScatterAtomic {};
75
76} // namespace Experimental
77} // namespace Kokkos
78
79namespace Kokkos {
80namespace Impl {
81namespace Experimental {
82
83template <typename ExecSpace>
84struct DefaultDuplication;
85
86template <typename ExecSpace, typename Duplication>
87struct DefaultContribution;
88
89#ifdef KOKKOS_ENABLE_SERIAL
90template <>
91struct DefaultDuplication<Kokkos::Serial> {
92 using type = Kokkos::Experimental::ScatterNonDuplicated;
93};
94
95template <>
96struct DefaultContribution<Kokkos::Serial,
97 Kokkos::Experimental::ScatterNonDuplicated> {
98 using type = Kokkos::Experimental::ScatterNonAtomic;
99};
100template <>
101struct DefaultContribution<Kokkos::Serial,
102 Kokkos::Experimental::ScatterDuplicated> {
103 using type = Kokkos::Experimental::ScatterNonAtomic;
104};
105#endif
106
107#ifdef KOKKOS_ENABLE_OPENMP
108template <>
109struct DefaultDuplication<Kokkos::OpenMP> {
110 using type = Kokkos::Experimental::ScatterDuplicated;
111};
112template <>
113struct DefaultContribution<Kokkos::OpenMP,
114 Kokkos::Experimental::ScatterNonDuplicated> {
115 using type = Kokkos::Experimental::ScatterAtomic;
116};
117template <>
118struct DefaultContribution<Kokkos::OpenMP,
119 Kokkos::Experimental::ScatterDuplicated> {
120 using type = Kokkos::Experimental::ScatterNonAtomic;
121};
122#endif
123
124#ifdef KOKKOS_ENABLE_OPENMPTARGET
125template <>
126struct DefaultDuplication<Kokkos::Experimental::OpenMPTarget> {
127 using type = Kokkos::Experimental::ScatterNonDuplicated;
128};
129template <>
130struct DefaultContribution<Kokkos::Experimental::OpenMPTarget,
131 Kokkos::Experimental::ScatterNonDuplicated> {
132 using type = Kokkos::Experimental::ScatterAtomic;
133};
134template <>
135struct DefaultContribution<Kokkos::Experimental::OpenMPTarget,
136 Kokkos::Experimental::ScatterDuplicated> {
137 using type = Kokkos::Experimental::ScatterNonAtomic;
138};
139#endif
140
141#ifdef KOKKOS_ENABLE_HPX
142template <>
143struct DefaultDuplication<Kokkos::Experimental::HPX> {
144 using type = Kokkos::Experimental::ScatterDuplicated;
145};
146template <>
147struct DefaultContribution<Kokkos::Experimental::HPX,
148 Kokkos::Experimental::ScatterNonDuplicated> {
149 using type = Kokkos::Experimental::ScatterAtomic;
150};
151template <>
152struct DefaultContribution<Kokkos::Experimental::HPX,
153 Kokkos::Experimental::ScatterDuplicated> {
154 using type = Kokkos::Experimental::ScatterNonAtomic;
155};
156#endif
157
158#ifdef KOKKOS_ENABLE_THREADS
159template <>
160struct DefaultDuplication<Kokkos::Threads> {
161 using type = Kokkos::Experimental::ScatterDuplicated;
162};
163template <>
164struct DefaultContribution<Kokkos::Threads,
165 Kokkos::Experimental::ScatterNonDuplicated> {
166 using type = Kokkos::Experimental::ScatterAtomic;
167};
168template <>
169struct DefaultContribution<Kokkos::Threads,
170 Kokkos::Experimental::ScatterDuplicated> {
171 using type = Kokkos::Experimental::ScatterNonAtomic;
172};
173#endif
174
175#ifdef KOKKOS_ENABLE_CUDA
176template <>
177struct DefaultDuplication<Kokkos::Cuda> {
178 using type = Kokkos::Experimental::ScatterNonDuplicated;
179};
180template <>
181struct DefaultContribution<Kokkos::Cuda,
182 Kokkos::Experimental::ScatterNonDuplicated> {
183 using type = Kokkos::Experimental::ScatterAtomic;
184};
185template <>
186struct DefaultContribution<Kokkos::Cuda,
187 Kokkos::Experimental::ScatterDuplicated> {
188 using type = Kokkos::Experimental::ScatterAtomic;
189};
190#endif
191
192#ifdef KOKKOS_ENABLE_HIP
193template <>
194struct DefaultDuplication<Kokkos::Experimental::HIP> {
195 using type = Kokkos::Experimental::ScatterNonDuplicated;
196};
197template <>
198struct DefaultContribution<Kokkos::Experimental::HIP,
199 Kokkos::Experimental::ScatterNonDuplicated> {
200 using type = Kokkos::Experimental::ScatterAtomic;
201};
202template <>
203struct DefaultContribution<Kokkos::Experimental::HIP,
204 Kokkos::Experimental::ScatterDuplicated> {
205 using type = Kokkos::Experimental::ScatterAtomic;
206};
207#endif
208
209#ifdef KOKKOS_ENABLE_SYCL
210template <>
211struct DefaultDuplication<Kokkos::Experimental::SYCL> {
212 using type = Kokkos::Experimental::ScatterNonDuplicated;
213};
214template <>
215struct DefaultContribution<Kokkos::Experimental::SYCL,
216 Kokkos::Experimental::ScatterNonDuplicated> {
217 using type = Kokkos::Experimental::ScatterAtomic;
218};
219template <>
220struct DefaultContribution<Kokkos::Experimental::SYCL,
221 Kokkos::Experimental::ScatterDuplicated> {
222 using type = Kokkos::Experimental::ScatterAtomic;
223};
224#endif
225
226// FIXME All these scatter values need overhaul:
227// - like should they be copyable at all?
228// - what is the internal handle type
229// - remove join
230// - consistently use the update function in operators
231template <typename ValueType, typename Op, typename DeviceType,
232 typename Contribution>
233struct ScatterValue;
234
235/* ScatterValue <Op=ScatterSum, Contribution=ScatterNonAtomic> is
236 the object returned by the access operator() of ScatterAccess. This class
237 inherits from the Sum<> reducer and it wraps join(dest, src) with convenient
238 operator+=, etc. Note the addition of update(ValueType const& rhs) and
239 reset() so that all reducers can have common functions See ReduceDuplicates
240 and ResetDuplicates ) */
241template <typename ValueType, typename DeviceType>
242struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, DeviceType,
243 Kokkos::Experimental::ScatterNonAtomic> {
244 ValueType& value;
245
246 public:
247 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
248 : value(value_in) {}
249 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
250 : value(other.value) {}
251 KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) {
252 update(rhs);
253 }
254 KOKKOS_FORCEINLINE_FUNCTION void operator++() { update(1); }
255 KOKKOS_FORCEINLINE_FUNCTION void operator++(int) { update(1); }
256 KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) {
257 update(ValueType(-rhs));
258 }
259 KOKKOS_FORCEINLINE_FUNCTION void operator--() { update(ValueType(-1)); }
260 KOKKOS_FORCEINLINE_FUNCTION void operator--(int) { update(ValueType(-1)); }
261 KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
262 value += rhs;
263 }
264 KOKKOS_FORCEINLINE_FUNCTION void reset() {
265 value = reduction_identity<ValueType>::sum();
266 }
267};
268
269/* ScatterValue <Op=ScatterSum, Contribution=ScatterAtomic> is the
270 object returned by the access operator() of ScatterAccess. This class inherits
271 from the Sum<> reducer, and similar to that returned by an Atomic View, it
272 wraps Kokkos::atomic_add with convenient operator+=, etc. This version also has
273 the update(rhs) and reset() functions. */
274template <typename ValueType, typename DeviceType>
275struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, DeviceType,
276 Kokkos::Experimental::ScatterAtomic> {
277 ValueType& value;
278
279 public:
280 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
281 : value(value_in) {}
282
283 KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) {
284 this->join(value, rhs);
285 }
286 KOKKOS_FORCEINLINE_FUNCTION void operator++() { this->join(value, 1); }
287 KOKKOS_FORCEINLINE_FUNCTION void operator++(int) { this->join(value, 1); }
288 KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) {
289 this->join(value, ValueType(-rhs));
290 }
291 KOKKOS_FORCEINLINE_FUNCTION void operator--() {
292 this->join(value, ValueType(-1));
293 }
294 KOKKOS_FORCEINLINE_FUNCTION void operator--(int) {
295 this->join(value, ValueType(-1));
296 }
297
298 KOKKOS_INLINE_FUNCTION
299 void join(ValueType& dest, const ValueType& src) const {
300 Kokkos::atomic_add(&dest, src);
301 }
302
303 KOKKOS_INLINE_FUNCTION
304 void join(volatile ValueType& dest, const volatile ValueType& src) const {
305 Kokkos::atomic_add(&dest, src);
306 }
307
308 KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
309 this->join(value, rhs);
310 }
311
312 KOKKOS_FORCEINLINE_FUNCTION void reset() {
313 value = reduction_identity<ValueType>::sum();
314 }
315};
316
317/* ScatterValue <Op=ScatterProd, Contribution=ScatterNonAtomic> is
318 the object returned by the access operator() of ScatterAccess. This class
319 inherits from the Prod<> reducer, and it wraps join(dest, src) with
320 convenient operator*=, etc. Note the addition of update(ValueType const& rhs)
321 and reset() so that all reducers can have common functions See
322 ReduceDuplicates and ResetDuplicates ) */
323template <typename ValueType, typename DeviceType>
324struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd, DeviceType,
325 Kokkos::Experimental::ScatterNonAtomic> {
326 ValueType& value;
327
328 public:
329 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
330 : value(value_in) {}
331 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
332 : value(other.value) {}
333 KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) {
334 value *= rhs;
335 }
336 KOKKOS_FORCEINLINE_FUNCTION void operator/=(ValueType const& rhs) {
337 value /= rhs;
338 }
339
340 KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
341 value *= rhs;
342 }
343 KOKKOS_FORCEINLINE_FUNCTION void reset() {
344 value = reduction_identity<ValueType>::prod();
345 }
346};
347
348/* ScatterValue <Op=ScatterProd, Contribution=ScatterAtomic> is the
349 object returned by the access operator() of ScatterAccess. This class
350 inherits from the Prod<> reducer, and similar to that returned by an Atomic
351 View, it wraps and atomic_prod with convenient operator*=, etc. atomic_prod
352 uses the atomic_compare_exchange. This version also has the update(rhs)
353 and reset() functions. */
354template <typename ValueType, typename DeviceType>
355struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd, DeviceType,
356 Kokkos::Experimental::ScatterAtomic> {
357 ValueType& value;
358
359 public:
360 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
361 : value(value_in) {}
362 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
363 : value(other.value) {}
364
365 KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) {
366 Kokkos::atomic_mul(&value, rhs);
367 }
368 KOKKOS_FORCEINLINE_FUNCTION void operator/=(ValueType const& rhs) {
369 Kokkos::atomic_div(&value, rhs);
370 }
371
372 KOKKOS_FORCEINLINE_FUNCTION
373 void atomic_prod(ValueType& dest, const ValueType& src) const {
374 bool success = false;
375 while (!success) {
376 ValueType dest_old = dest;
377 ValueType dest_new = dest_old * src;
378 dest_new =
379 Kokkos::atomic_compare_exchange<ValueType>(&dest, dest_old, dest_new);
380 success = ((dest_new - dest_old) / dest_old <= 1e-15);
381 }
382 }
383
384 KOKKOS_INLINE_FUNCTION
385 void join(ValueType& dest, const ValueType& src) const {
386 atomic_prod(&dest, src);
387 }
388
389 KOKKOS_INLINE_FUNCTION
390 void join(volatile ValueType& dest, const volatile ValueType& src) const {
391 atomic_prod(&dest, src);
392 }
393
394 KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
395 atomic_prod(&value, rhs);
396 }
397 KOKKOS_FORCEINLINE_FUNCTION void reset() {
398 value = reduction_identity<ValueType>::prod();
399 }
400};
401
402/* ScatterValue <Op=ScatterMin, Contribution=ScatterNonAtomic> is
403 the object returned by the access operator() of ScatterAccess. This class
404 inherits from the Min<> reducer and it wraps join(dest, src) with convenient
405 update(rhs). Note the addition of update(ValueType const& rhs) and reset()
406 are so that all reducers can have a common update function See
407 ReduceDuplicates and ResetDuplicates ) */
408template <typename ValueType, typename DeviceType>
409struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin, DeviceType,
410 Kokkos::Experimental::ScatterNonAtomic> {
411 ValueType& value;
412 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
413 : value(value_in) {}
414 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
415 : value(other.value) {}
416
417 public:
418 KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
419 value = rhs < value ? rhs : value;
420 }
421 KOKKOS_FORCEINLINE_FUNCTION void reset() {
422 value = reduction_identity<ValueType>::min();
423 }
424};
425
426/* ScatterValue <Op=ScatterMin, Contribution=ScatterAtomic> is the
427 object returned by the access operator() of ScatterAccess. This class
428 inherits from the Min<> reducer, and similar to that returned by an Atomic
429 View, it wraps atomic_min with join(), etc. atomic_min uses the
430 atomic_compare_exchange. This version also has the update(rhs) and reset()
431 functions. */
432template <typename ValueType, typename DeviceType>
433struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin, DeviceType,
434 Kokkos::Experimental::ScatterAtomic> {
435 ValueType& value;
436
437 public:
438 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
439 : value(value_in) {}
440 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
441 : value(other.value) {}
442
443 KOKKOS_FORCEINLINE_FUNCTION
444 void atomic_min(ValueType& dest, const ValueType& src) const {
445 bool success = false;
446 while (!success) {
447 ValueType dest_old = dest;
448 ValueType dest_new = (dest_old > src) ? src : dest_old;
449 dest_new =
450 Kokkos::atomic_compare_exchange<ValueType>(&dest, dest_old, dest_new);
451 success = ((dest_new - dest_old) / dest_old <= 1e-15);
452 }
453 }
454
455 KOKKOS_INLINE_FUNCTION
456 void join(ValueType& dest, const ValueType& src) const {
457 atomic_min(dest, src);
458 }
459
460 KOKKOS_INLINE_FUNCTION
461 void join(volatile ValueType& dest, const volatile ValueType& src) const {
462 atomic_min(dest, src);
463 }
464
465 KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
466 this->join(value, rhs);
467 }
468 KOKKOS_FORCEINLINE_FUNCTION void reset() {
469 value = reduction_identity<ValueType>::min();
470 }
471};
472
473/* ScatterValue <Op=ScatterMax, Contribution=ScatterNonAtomic> is
474 the object returned by the access operator() of ScatterAccess. This class
475 inherits from the Max<> reducer and it wraps join(dest, src) with convenient
476 update(rhs). Note the addition of update(ValueType const& rhs) and reset()
477 are so that all reducers can have a common update function See
478 ReduceDuplicates and ResetDuplicates ) */
479template <typename ValueType, typename DeviceType>
480struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax, DeviceType,
481 Kokkos::Experimental::ScatterNonAtomic> {
482 ValueType& value;
483
484 public:
485 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
486 : value(value_in) {}
487 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
488 : value(other.value) {}
489 KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
490 value = rhs > value ? rhs : value;
491 }
492 KOKKOS_FORCEINLINE_FUNCTION void reset() {
493 value = reduction_identity<ValueType>::max();
494 }
495};
496
497/* ScatterValue <Op=ScatterMax, Contribution=ScatterAtomic> is the
498 object returned by the access operator() of ScatterAccess. This class
499 inherits from the Max<> reducer, and similar to that returned by an Atomic
500 View, it wraps atomic_max with join(), etc. atomic_max uses the
501 atomic_compare_exchange. This version also has the update(rhs) and reset()
502 functions. */
503template <typename ValueType, typename DeviceType>
504struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax, DeviceType,
505 Kokkos::Experimental::ScatterAtomic> {
506 ValueType& value;
507
508 public:
509 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
510 : value(value_in) {}
511 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
512 : value(other.value) {}
513
514 KOKKOS_FORCEINLINE_FUNCTION
515 void atomic_max(ValueType& dest, const ValueType& src) const {
516 bool success = false;
517 while (!success) {
518 ValueType dest_old = dest;
519 ValueType dest_new = (dest_old < src) ? src : dest_old;
520 dest_new =
521 Kokkos::atomic_compare_exchange<ValueType>(&dest, dest_old, dest_new);
522 success = ((dest_new - dest_old) / dest_old <= 1e-15);
523 }
524 }
525
526 KOKKOS_INLINE_FUNCTION
527 void join(ValueType& dest, const ValueType& src) const {
528 atomic_max(dest, src);
529 }
530
531 KOKKOS_INLINE_FUNCTION
532 void join(volatile ValueType& dest, const volatile ValueType& src) const {
533 atomic_max(dest, src);
534 }
535
536 KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
537 this->join(value, rhs);
538 }
539 KOKKOS_FORCEINLINE_FUNCTION void reset() {
540 value = reduction_identity<ValueType>::max();
541 }
542};
543
544/* DuplicatedDataType, given a View DataType, will create a new DataType
545 that has a new runtime dimension which becomes the largest-stride dimension.
546 In the case of LayoutLeft, due to the limitation induced by the design of
547 DataType itself, it must convert any existing compile-time dimensions into
548 runtime dimensions. */
549template <typename T, typename Layout>
550struct DuplicatedDataType;
551
552template <typename T>
553struct DuplicatedDataType<T, Kokkos::LayoutRight> {
554 using value_type = T*; // For LayoutRight, add a star all the way on the left
555};
556
557template <typename T, size_t N>
558struct DuplicatedDataType<T[N], Kokkos::LayoutRight> {
559 using value_type =
560 typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type[N];
561};
562
563template <typename T>
564struct DuplicatedDataType<T[], Kokkos::LayoutRight> {
565 using value_type =
566 typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type[];
567};
568
569template <typename T>
570struct DuplicatedDataType<T*, Kokkos::LayoutRight> {
571 using value_type =
572 typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type*;
573};
574
575template <typename T>
576struct DuplicatedDataType<T, Kokkos::LayoutLeft> {
577 using value_type = T*;
578};
579
580template <typename T, size_t N>
581struct DuplicatedDataType<T[N], Kokkos::LayoutLeft> {
582 using value_type =
583 typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type*;
584};
585
586template <typename T>
587struct DuplicatedDataType<T[], Kokkos::LayoutLeft> {
588 using value_type =
589 typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type*;
590};
591
592template <typename T>
593struct DuplicatedDataType<T*, Kokkos::LayoutLeft> {
594 using value_type =
595 typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type*;
596};
597
598/* Insert integer argument pack into array */
599
600template <class T>
601void args_to_array(size_t* array, int pos, T dim0) {
602 array[pos] = dim0;
603}
604template <class T, class... Dims>
605void args_to_array(size_t* array, int pos, T dim0, Dims... dims) {
606 array[pos] = dim0;
607 args_to_array(array, pos + 1, dims...);
608}
609
610/* Slice is just responsible for stuffing the correct number of Kokkos::ALL
611 arguments on the correct side of the index in a call to subview() to get a
612 subview where the index specified is the largest-stride one. */
613template <typename Layout, int rank, typename V, typename... Args>
614struct Slice {
615 using next = Slice<Layout, rank - 1, V, Kokkos::Impl::ALL_t, Args...>;
616 using value_type = typename next::value_type;
617
618 static value_type get(V const& src, const size_t i, Args... args) {
619 return next::get(src, i, Kokkos::ALL, args...);
620 }
621};
622
623template <typename V, typename... Args>
624struct Slice<Kokkos::LayoutRight, 1, V, Args...> {
625 using value_type =
626 typename Kokkos::Impl::ViewMapping<void, V, const size_t, Args...>::type;
627 static value_type get(V const& src, const size_t i, Args... args) {
628 return Kokkos::subview(src, i, args...);
629 }
630};
631
632template <typename V, typename... Args>
633struct Slice<Kokkos::LayoutLeft, 1, V, Args...> {
634 using value_type =
635 typename Kokkos::Impl::ViewMapping<void, V, Args..., const size_t>::type;
636 static value_type get(V const& src, const size_t i, Args... args) {
637 return Kokkos::subview(src, args..., i);
638 }
639};
640
641template <typename ExecSpace, typename ValueType, typename Op>
642struct ReduceDuplicates;
643
644template <typename ExecSpace, typename ValueType, typename Op>
645struct ReduceDuplicatesBase {
646 using Derived = ReduceDuplicates<ExecSpace, ValueType, Op>;
647 ValueType const* src;
648 ValueType* dst;
649 size_t stride;
650 size_t start;
651 size_t n;
652 ReduceDuplicatesBase(ExecSpace const& exec_space, ValueType const* src_in,
653 ValueType* dest_in, size_t stride_in, size_t start_in,
654 size_t n_in, std::string const& name)
655 : src(src_in), dst(dest_in), stride(stride_in), start(start_in), n(n_in) {
657 std::string("Kokkos::ScatterView::ReduceDuplicates [") + name + "]",
658 RangePolicy<ExecSpace, size_t>(exec_space, 0, stride),
659 static_cast<Derived const&>(*this));
660 }
661};
662
663/* ReduceDuplicates -- Perform reduction on destination array using strided
664 * source Use ScatterValue<> specific to operation to wrap destination array so
665 * that the reduction operation can be accessed via the update(rhs) function */
666template <typename ExecSpace, typename ValueType, typename Op>
667struct ReduceDuplicates
668 : public ReduceDuplicatesBase<ExecSpace, ValueType, Op> {
669 using Base = ReduceDuplicatesBase<ExecSpace, ValueType, Op>;
670 ReduceDuplicates(ExecSpace const& exec_space, ValueType const* src_in,
671 ValueType* dst_in, size_t stride_in, size_t start_in,
672 size_t n_in, std::string const& name)
673 : Base(exec_space, src_in, dst_in, stride_in, start_in, n_in, name) {}
674 KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const {
675 for (size_t j = Base::start; j < Base::n; ++j) {
676 ScatterValue<ValueType, Op, ExecSpace,
677 Kokkos::Experimental::ScatterNonAtomic>
678 sv(Base::dst[i]);
679 sv.update(Base::src[i + Base::stride * j]);
680 }
681 }
682};
683
684template <typename ExecSpace, typename ValueType, typename Op>
685struct ResetDuplicates;
686
687template <typename ExecSpace, typename ValueType, typename Op>
688struct ResetDuplicatesBase {
689 using Derived = ResetDuplicates<ExecSpace, ValueType, Op>;
690 ValueType* data;
691 ResetDuplicatesBase(ExecSpace const& exec_space, ValueType* data_in,
692 size_t size_in, std::string const& name)
693 : data(data_in) {
695 std::string("Kokkos::ScatterView::ResetDuplicates [") + name + "]",
696 RangePolicy<ExecSpace, size_t>(exec_space, 0, size_in),
697 static_cast<Derived const&>(*this));
698 }
699};
700
701/* ResetDuplicates -- Perform reset on destination array
702 * Use ScatterValue<> specific to operation to wrap destination array so that
703 * the reset operation can be accessed via the reset() function */
704template <typename ExecSpace, typename ValueType, typename Op>
705struct ResetDuplicates : public ResetDuplicatesBase<ExecSpace, ValueType, Op> {
706 using Base = ResetDuplicatesBase<ExecSpace, ValueType, Op>;
707 ResetDuplicates(ExecSpace const& exec_space, ValueType* data_in,
708 size_t size_in, std::string const& name)
709 : Base(exec_space, data_in, size_in, name) {}
710 KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const {
711 ScatterValue<ValueType, Op, ExecSpace,
712 Kokkos::Experimental::ScatterNonAtomic>
713 sv(Base::data[i]);
714 sv.reset();
715 }
716};
717
718template <typename... P>
719void check_scatter_view_allocation_properties_argument(
720 ViewCtorProp<P...> const&) {
721 static_assert(ViewCtorProp<P...>::has_execution_space &&
722 ViewCtorProp<P...>::has_label &&
723 ViewCtorProp<P...>::initialize,
724 "Allocation property must have an execution name as well as a "
725 "label, and must perform the view initialization");
726}
727
728} // namespace Experimental
729} // namespace Impl
730} // namespace Kokkos
731
732namespace Kokkos {
733namespace Experimental {
734
735template <typename DataType,
736 typename Layout = Kokkos::DefaultExecutionSpace::array_layout,
737 typename DeviceType = Kokkos::DefaultExecutionSpace,
738 typename Op = Kokkos::Experimental::ScatterSum,
739 typename Duplication = typename Kokkos::Impl::Experimental::
740 DefaultDuplication<typename DeviceType::execution_space>::type,
741 typename Contribution =
742 typename Kokkos::Impl::Experimental::DefaultContribution<
743 typename DeviceType::execution_space, Duplication>::type>
744class ScatterView;
745
746template <typename DataType, typename Op, typename DeviceType, typename Layout,
747 typename Duplication, typename Contribution,
748 typename OverrideContribution>
749class ScatterAccess;
750
751// non-duplicated implementation
752template <typename DataType, typename Op, typename DeviceType, typename Layout,
753 typename Contribution>
754class ScatterView<DataType, Layout, DeviceType, Op, ScatterNonDuplicated,
755 Contribution> {
756 public:
757 using execution_space = typename DeviceType::execution_space;
758 using memory_space = typename DeviceType::memory_space;
760 using original_view_type = Kokkos::View<DataType, Layout, device_type>;
761 using original_value_type = typename original_view_type::value_type;
762 using original_reference_type = typename original_view_type::reference_type;
763 friend class ScatterAccess<DataType, Op, DeviceType, Layout,
764 ScatterNonDuplicated, Contribution,
765 ScatterNonAtomic>;
766 friend class ScatterAccess<DataType, Op, DeviceType, Layout,
767 ScatterNonDuplicated, Contribution, ScatterAtomic>;
768 template <class, class, class, class, class, class>
769 friend class ScatterView;
770
771 ScatterView() = default;
772
773 template <typename RT, typename... RP>
774 ScatterView(View<RT, RP...> const& original_view)
775 : internal_view(original_view) {}
776
777 template <typename RT, typename... P, typename... RP>
778 ScatterView(execution_space const& /* exec_space */,
779 View<RT, RP...> const& original_view)
780 : internal_view(original_view) {}
781
782 template <typename... Dims>
783 ScatterView(std::string const& name, Dims... dims)
784 : internal_view(name, dims...) {}
785
786 // This overload allows specifying an execution space instance to be
787 // used by passing, e.g., Kokkos::view_alloc(exec_space, "label") as
788 // first argument.
789 template <typename... P, typename... Dims>
790 ScatterView(::Kokkos::Impl::ViewCtorProp<P...> const& arg_prop, Dims... dims)
791 : internal_view(arg_prop, dims...) {
792 using ::Kokkos::Impl::Experimental::
793 check_scatter_view_allocation_properties_argument;
794 check_scatter_view_allocation_properties_argument(arg_prop);
795 }
796
797 template <typename OtherDataType, typename OtherDeviceType>
798 KOKKOS_FUNCTION ScatterView(
799 const ScatterView<OtherDataType, Layout, OtherDeviceType, Op,
800 ScatterNonDuplicated, Contribution>& other_view)
801 : internal_view(other_view.internal_view) {}
802
803 template <typename OtherDataType, typename OtherDeviceType>
804 KOKKOS_FUNCTION void operator=(
805 const ScatterView<OtherDataType, Layout, OtherDeviceType, Op,
806 ScatterNonDuplicated, Contribution>& other_view) {
807 internal_view = other_view.internal_view;
808 }
809
810 template <typename OverrideContribution = Contribution>
811 KOKKOS_FORCEINLINE_FUNCTION
812 ScatterAccess<DataType, Op, DeviceType, Layout, ScatterNonDuplicated,
813 Contribution, OverrideContribution>
814 access() const {
815 return ScatterAccess<DataType, Op, DeviceType, Layout, ScatterNonDuplicated,
816 Contribution, OverrideContribution>(*this);
817 }
818
819 original_view_type subview() const { return internal_view; }
820
821 KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
822 return internal_view.is_allocated();
823 }
824
825 template <typename DT, typename... RP>
826 void contribute_into(View<DT, RP...> const& dest) const {
827 contribute_into(execution_space(), dest);
828 }
829
830 template <typename DT, typename... RP>
831 void contribute_into(execution_space const& exec_space,
832 View<DT, RP...> const& dest) const {
833 using dest_type = View<DT, RP...>;
834 static_assert(std::is_same<typename dest_type::array_layout, Layout>::value,
835 "ScatterView contribute destination has different layout");
836 static_assert(
838 execution_space, typename dest_type::memory_space>::accessible,
839 "ScatterView contribute destination memory space not accessible");
840 if (dest.data() == internal_view.data()) return;
841 Kokkos::Impl::Experimental::ReduceDuplicates<execution_space,
842 original_value_type, Op>(
843 exec_space, internal_view.data(), dest.data(), 0, 0, 1,
844 internal_view.label());
845 }
846
847 void reset(execution_space const& exec_space = execution_space()) {
848 Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
849 original_value_type, Op>(
850 exec_space, internal_view.data(), internal_view.size(),
851 internal_view.label());
852 }
853 template <typename DT, typename... RP>
854 void reset_except(View<DT, RP...> const& view) {
855 reset_except(execution_space(), view);
856 }
857
858 template <typename DT, typename... RP>
859 void reset_except(const execution_space& exec_space,
860 View<DT, RP...> const& view) {
861 if (view.data() != internal_view.data()) reset(exec_space);
862 }
863
864 void resize(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
865 const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
866 const size_t n6 = 0, const size_t n7 = 0) {
867 ::Kokkos::resize(internal_view, n0, n1, n2, n3, n4, n5, n6, n7);
868 }
869
870 void realloc(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
871 const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
872 const size_t n6 = 0, const size_t n7 = 0) {
873 ::Kokkos::realloc(internal_view, n0, n1, n2, n3, n4, n5, n6, n7);
874 }
875
876 protected:
877 template <typename... Args>
878 KOKKOS_FORCEINLINE_FUNCTION original_reference_type at(Args... args) const {
879 return internal_view(args...);
880 }
881
882 private:
883 using internal_view_type = original_view_type;
884 internal_view_type internal_view;
885};
886
887template <typename DataType, typename Op, typename DeviceType, typename Layout,
888 typename Contribution, typename OverrideContribution>
889class ScatterAccess<DataType, Op, DeviceType, Layout, ScatterNonDuplicated,
890 Contribution, OverrideContribution> {
891 public:
892 using view_type = ScatterView<DataType, Layout, DeviceType, Op,
893 ScatterNonDuplicated, Contribution>;
894 using original_value_type = typename view_type::original_value_type;
895 using value_type = Kokkos::Impl::Experimental::ScatterValue<
896 original_value_type, Op, DeviceType, OverrideContribution>;
897
898 KOKKOS_INLINE_FUNCTION
899 ScatterAccess() : view(view_type()) {}
900
901 KOKKOS_INLINE_FUNCTION
902 ScatterAccess(view_type const& view_in) : view(view_in) {}
903 KOKKOS_DEFAULTED_FUNCTION
904 ~ScatterAccess() = default;
905
906 template <typename... Args>
907 KOKKOS_FORCEINLINE_FUNCTION value_type operator()(Args... args) const {
908 return view.at(args...);
909 }
910
911 template <typename Arg>
912 KOKKOS_FORCEINLINE_FUNCTION
913 typename std::enable_if<view_type::original_view_type::rank == 1 &&
914 std::is_integral<Arg>::value,
915 value_type>::type
916 operator[](Arg arg) const {
917 return view.at(arg);
918 }
919
920 private:
921 view_type const& view;
922};
923
924// duplicated implementation
925// LayoutLeft and LayoutRight are different enough that we'll just specialize
926// each
927
928template <typename DataType, typename Op, typename DeviceType,
929 typename Contribution>
930class ScatterView<DataType, Kokkos::LayoutRight, DeviceType, Op,
931 ScatterDuplicated, Contribution> {
932 public:
933 using execution_space = typename DeviceType::execution_space;
934 using memory_space = typename DeviceType::memory_space;
936 using original_view_type =
938 using original_value_type = typename original_view_type::value_type;
939 using original_reference_type = typename original_view_type::reference_type;
940 friend class ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutRight,
941 ScatterDuplicated, Contribution, ScatterNonAtomic>;
942 friend class ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutRight,
943 ScatterDuplicated, Contribution, ScatterAtomic>;
944 template <class, class, class, class, class, class>
945 friend class ScatterView;
946
947 using data_type_info =
948 typename Kokkos::Impl::Experimental::DuplicatedDataType<
949 DataType, Kokkos::LayoutRight>;
950 using internal_data_type = typename data_type_info::value_type;
951 using internal_view_type =
952 Kokkos::View<internal_data_type, Kokkos::LayoutRight, device_type>;
953
954 ScatterView() = default;
955
956 template <typename OtherDataType, typename OtherDeviceType>
957 KOKKOS_FUNCTION ScatterView(
958 const ScatterView<OtherDataType, Kokkos::LayoutRight, OtherDeviceType, Op,
959 ScatterDuplicated, Contribution>& other_view)
960 : unique_token(other_view.unique_token),
961 internal_view(other_view.internal_view) {}
962
963 template <typename OtherDataType, typename OtherDeviceType>
964 KOKKOS_FUNCTION void operator=(
965 const ScatterView<OtherDataType, Kokkos::LayoutRight, OtherDeviceType, Op,
966 ScatterDuplicated, Contribution>& other_view) {
967 unique_token = other_view.unique_token;
968 internal_view = other_view.internal_view;
969 }
970
971 template <typename RT, typename... RP>
972 ScatterView(View<RT, RP...> const& original_view)
973 : ScatterView(execution_space(), original_view) {}
974
975 template <typename RT, typename... P, typename... RP>
976 ScatterView(execution_space const& exec_space,
977 View<RT, RP...> const& original_view)
978 : unique_token(),
979 internal_view(
980 view_alloc(WithoutInitializing,
981 std::string("duplicated_") + original_view.label(),
982 exec_space),
983 unique_token.size(),
984 original_view.rank_dynamic > 0 ? original_view.extent(0)
985 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
986 original_view.rank_dynamic > 1 ? original_view.extent(1)
987 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
988 original_view.rank_dynamic > 2 ? original_view.extent(2)
989 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
990 original_view.rank_dynamic > 3 ? original_view.extent(3)
991 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
992 original_view.rank_dynamic > 4 ? original_view.extent(4)
993 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
994 original_view.rank_dynamic > 5 ? original_view.extent(5)
995 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
996 original_view.rank_dynamic > 6 ? original_view.extent(6)
997 : KOKKOS_IMPL_CTOR_DEFAULT_ARG)
998
999 {
1000 reset(exec_space);
1001 }
1002
1003 template <typename... Dims>
1004 ScatterView(std::string const& name, Dims... dims)
1005 : ScatterView(view_alloc(execution_space(), name), dims...) {}
1006
1007 // This overload allows specifying an execution space instance to be
1008 // used by passing, e.g., Kokkos::view_alloc(exec_space, "label") as
1009 // first argument.
1010 template <typename... P, typename... Dims>
1011 ScatterView(::Kokkos::Impl::ViewCtorProp<P...> const& arg_prop, Dims... dims)
1012 : internal_view(view_alloc(WithoutInitializing,
1013 static_cast<::Kokkos::Impl::ViewCtorProp<
1014 void, std::string> const&>(arg_prop)
1015 .value),
1016 unique_token.size(), dims...) {
1017 using ::Kokkos::Impl::Experimental::
1018 check_scatter_view_allocation_properties_argument;
1019 check_scatter_view_allocation_properties_argument(arg_prop);
1020
1021 auto const exec_space =
1022 static_cast<::Kokkos::Impl::ViewCtorProp<void, execution_space> const&>(
1023 arg_prop)
1024 .value;
1025 reset(exec_space);
1026 }
1027
1028 template <typename OverrideContribution = Contribution>
1029 KOKKOS_FORCEINLINE_FUNCTION
1030 ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutRight,
1031 ScatterDuplicated, Contribution, OverrideContribution>
1032 access() const {
1033 return ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutRight,
1034 ScatterDuplicated, Contribution, OverrideContribution>(
1035 *this);
1036 }
1037
1038 typename Kokkos::Impl::Experimental::Slice<Kokkos::LayoutRight,
1039 internal_view_type::rank,
1040 internal_view_type>::value_type
1041 subview() const {
1042 return Kokkos::Impl::Experimental::Slice<
1043 Kokkos::LayoutRight, internal_view_type::Rank,
1044 internal_view_type>::get(internal_view, 0);
1045 }
1046
1047 KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
1048 return internal_view.is_allocated();
1049 }
1050
1051 template <typename DT, typename... RP>
1052 void contribute_into(View<DT, RP...> const& dest) const {
1053 contribute_into(execution_space(), dest);
1054 }
1055
1056 template <typename DT, typename... RP>
1057 void contribute_into(execution_space const& exec_space,
1058 View<DT, RP...> const& dest) const {
1059 using dest_type = View<DT, RP...>;
1060 static_assert(std::is_same<typename dest_type::array_layout,
1061 Kokkos::LayoutRight>::value,
1062 "ScatterView deep_copy destination has different layout");
1063 static_assert(
1065 execution_space, typename dest_type::memory_space>::accessible,
1066 "ScatterView deep_copy destination memory space not accessible");
1067 bool is_equal = (dest.data() == internal_view.data());
1068 size_t start = is_equal ? 1 : 0;
1069 Kokkos::Impl::Experimental::ReduceDuplicates<execution_space,
1070 original_value_type, Op>(
1071 exec_space, internal_view.data(), dest.data(), internal_view.stride(0),
1072 start, internal_view.extent(0), internal_view.label());
1073 }
1074
1075 void reset(execution_space const& exec_space = execution_space()) {
1076 Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
1077 original_value_type, Op>(
1078 exec_space, internal_view.data(), internal_view.size(),
1079 internal_view.label());
1080 }
1081
1082 template <typename DT, typename... RP>
1083 void reset_except(View<DT, RP...> const& view) {
1084 reset_except(execution_space(), view);
1085 }
1086
1087 template <typename DT, typename... RP>
1088 void reset_except(execution_space const& exec_space,
1089 View<DT, RP...> const& view) {
1090 if (view.data() != internal_view.data()) {
1091 reset(exec_space);
1092 return;
1093 }
1094 Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
1095 original_value_type, Op>(
1096 exec_space, internal_view.data() + view.size(),
1097 internal_view.size() - view.size(), internal_view.label());
1098 }
1099
1100 void resize(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
1101 const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
1102 const size_t n6 = 0) {
1103 ::Kokkos::resize(internal_view, unique_token.size(), n0, n1, n2, n3, n4, n5,
1104 n6);
1105 }
1106
1107 void realloc(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
1108 const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
1109 const size_t n6 = 0) {
1110 ::Kokkos::realloc(internal_view, unique_token.size(), n0, n1, n2, n3, n4,
1111 n5, n6);
1112 }
1113
1114 protected:
1115 template <typename... Args>
1116 KOKKOS_FORCEINLINE_FUNCTION original_reference_type at(int rank,
1117 Args... args) const {
1118 return internal_view(rank, args...);
1119 }
1120
1121 protected:
1122 using unique_token_type = Kokkos::Experimental::UniqueToken<
1123 execution_space, Kokkos::Experimental::UniqueTokenScope::Global>;
1124
1125 unique_token_type unique_token;
1126 internal_view_type internal_view;
1127};
1128
1129template <typename DataType, typename Op, typename DeviceType,
1130 typename Contribution>
1131class ScatterView<DataType, Kokkos::LayoutLeft, DeviceType, Op,
1132 ScatterDuplicated, Contribution> {
1133 public:
1134 using execution_space = typename DeviceType::execution_space;
1135 using memory_space = typename DeviceType::memory_space;
1137 using original_view_type =
1139 using original_value_type = typename original_view_type::value_type;
1140 using original_reference_type = typename original_view_type::reference_type;
1141 friend class ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutLeft,
1142 ScatterDuplicated, Contribution, ScatterNonAtomic>;
1143 friend class ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutLeft,
1144 ScatterDuplicated, Contribution, ScatterAtomic>;
1145 template <class, class, class, class, class, class>
1146 friend class ScatterView;
1147
1148 using data_type_info =
1149 typename Kokkos::Impl::Experimental::DuplicatedDataType<
1150 DataType, Kokkos::LayoutLeft>;
1151 using internal_data_type = typename data_type_info::value_type;
1152 using internal_view_type =
1153 Kokkos::View<internal_data_type, Kokkos::LayoutLeft, device_type>;
1154
1155 ScatterView() = default;
1156
1157 template <typename RT, typename... RP>
1158 ScatterView(View<RT, RP...> const& original_view)
1159 : ScatterView(execution_space(), original_view) {}
1160
1161 template <typename RT, typename... P, typename... RP>
1162 ScatterView(execution_space const& exec_space,
1163 View<RT, RP...> const& original_view)
1164 : unique_token() {
1165 size_t arg_N[8] = {original_view.rank > 0 ? original_view.extent(0)
1166 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1167 original_view.rank > 1 ? original_view.extent(1)
1168 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1169 original_view.rank > 2 ? original_view.extent(2)
1170 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1171 original_view.rank > 3 ? original_view.extent(3)
1172 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1173 original_view.rank > 4 ? original_view.extent(4)
1174 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1175 original_view.rank > 5 ? original_view.extent(5)
1176 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1177 original_view.rank > 6 ? original_view.extent(6)
1178 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1179 KOKKOS_IMPL_CTOR_DEFAULT_ARG};
1180 arg_N[internal_view_type::rank - 1] = unique_token.size();
1181 internal_view = internal_view_type(
1182 view_alloc(WithoutInitializing,
1183 std::string("duplicated_") + original_view.label(),
1184 exec_space),
1185 arg_N[0], arg_N[1], arg_N[2], arg_N[3], arg_N[4], arg_N[5], arg_N[6],
1186 arg_N[7]);
1187 reset(exec_space);
1188 }
1189
1190 template <typename... Dims>
1191 ScatterView(std::string const& name, Dims... dims)
1192 : ScatterView(view_alloc(execution_space(), name), dims...) {}
1193
1194 // This overload allows specifying an execution space instance to be
1195 // used by passing, e.g., Kokkos::view_alloc(exec_space, "label") as
1196 // first argument.
1197 template <typename... P, typename... Dims>
1198 ScatterView(::Kokkos::Impl::ViewCtorProp<P...> const& arg_prop,
1199 Dims... dims) {
1200 using ::Kokkos::Impl::Experimental::
1201 check_scatter_view_allocation_properties_argument;
1202 check_scatter_view_allocation_properties_argument(arg_prop);
1203
1204 original_view_type original_view;
1205 size_t arg_N[8] = {original_view.rank > 0 ? original_view.static_extent(0)
1206 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1207 original_view.rank > 1 ? original_view.static_extent(1)
1208 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1209 original_view.rank > 2 ? original_view.static_extent(2)
1210 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1211 original_view.rank > 3 ? original_view.static_extent(3)
1212 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1213 original_view.rank > 4 ? original_view.static_extent(4)
1214 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1215 original_view.rank > 5 ? original_view.static_extent(5)
1216 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1217 original_view.rank > 6 ? original_view.static_extent(6)
1218 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1219 KOKKOS_IMPL_CTOR_DEFAULT_ARG};
1220 Kokkos::Impl::Experimental::args_to_array(arg_N, 0, dims...);
1221 arg_N[internal_view_type::rank - 1] = unique_token.size();
1222
1223 auto const name =
1224 static_cast<::Kokkos::Impl::ViewCtorProp<void, std::string> const&>(
1225 arg_prop)
1226 .value;
1227 internal_view = internal_view_type(view_alloc(WithoutInitializing, name),
1228 arg_N[0], arg_N[1], arg_N[2], arg_N[3],
1229 arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
1230
1231 auto const exec_space =
1232 static_cast<::Kokkos::Impl::ViewCtorProp<void, execution_space> const&>(
1233 arg_prop)
1234 .value;
1235 reset(exec_space);
1236 }
1237
1238 template <typename OtherDataType, typename OtherDeviceType>
1239 KOKKOS_FUNCTION ScatterView(
1240 const ScatterView<OtherDataType, Kokkos::LayoutLeft, OtherDeviceType, Op,
1241 ScatterDuplicated, Contribution>& other_view)
1242 : unique_token(other_view.unique_token),
1243 internal_view(other_view.internal_view) {}
1244
1245 template <typename OtherDataType, typename OtherDeviceType>
1246 KOKKOS_FUNCTION void operator=(
1247 const ScatterView<OtherDataType, Kokkos::LayoutLeft, OtherDeviceType, Op,
1248 ScatterDuplicated, Contribution>& other_view) {
1249 unique_token = other_view.unique_token;
1250 internal_view = other_view.internal_view;
1251 }
1252
1253 template <typename OverrideContribution = Contribution>
1254 KOKKOS_FORCEINLINE_FUNCTION
1255 ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutLeft,
1256 ScatterDuplicated, Contribution, OverrideContribution>
1257 access() const {
1258 return ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutLeft,
1259 ScatterDuplicated, Contribution, OverrideContribution>(
1260 *this);
1261 }
1262
1263 typename Kokkos::Impl::Experimental::Slice<Kokkos::LayoutLeft,
1264 internal_view_type::rank,
1265 internal_view_type>::value_type
1266 subview() const {
1267 return Kokkos::Impl::Experimental::Slice<
1268 Kokkos::LayoutLeft, internal_view_type::rank,
1269 internal_view_type>::get(internal_view, 0);
1270 }
1271
1272 KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
1273 return internal_view.is_allocated();
1274 }
1275
1276 template <typename... RP>
1277 void contribute_into(View<RP...> const& dest) const {
1278 contribute_into(execution_space(), dest);
1279 }
1280
1281 template <typename... RP>
1282 void contribute_into(execution_space const& exec_space,
1283 View<RP...> const& dest) const {
1284 using dest_type = View<RP...>;
1285 static_assert(
1286 std::is_same<typename dest_type::value_type,
1287 typename original_view_type::non_const_value_type>::value,
1288 "ScatterView deep_copy destination has wrong value_type");
1289 static_assert(std::is_same<typename dest_type::array_layout,
1290 Kokkos::LayoutLeft>::value,
1291 "ScatterView deep_copy destination has different layout");
1292 static_assert(
1294 execution_space, typename dest_type::memory_space>::accessible,
1295 "ScatterView deep_copy destination memory space not accessible");
1296 auto extent = internal_view.extent(internal_view_type::rank - 1);
1297 bool is_equal = (dest.data() == internal_view.data());
1298 size_t start = is_equal ? 1 : 0;
1299 Kokkos::Impl::Experimental::ReduceDuplicates<execution_space,
1300 original_value_type, Op>(
1301 exec_space, internal_view.data(), dest.data(),
1302 internal_view.stride(internal_view_type::rank - 1), start, extent,
1303 internal_view.label());
1304 }
1305
1306 void reset(execution_space const& exec_space = execution_space()) {
1307 Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
1308 original_value_type, Op>(
1309 exec_space, internal_view.data(), internal_view.size(),
1310 internal_view.label());
1311 }
1312
1313 template <typename DT, typename... RP>
1314 void reset_except(View<DT, RP...> const& view) {
1315 reset_except(execution_space(), view);
1316 }
1317
1318 template <typename DT, typename... RP>
1319 void reset_except(execution_space const& exec_space,
1320 View<DT, RP...> const& view) {
1321 if (view.data() != internal_view.data()) {
1322 reset(exec_space);
1323 return;
1324 }
1325 Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
1326 original_value_type, Op>(
1327 exec_space, internal_view.data() + view.size(),
1328 internal_view.size() - view.size(), internal_view.label());
1329 }
1330
1331 void resize(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
1332 const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
1333 const size_t n6 = 0) {
1334 size_t arg_N[8] = {n0, n1, n2, n3, n4, n5, n6, 0};
1335 const int i = internal_view.rank - 1;
1336 arg_N[i] = unique_token.size();
1337
1338 ::Kokkos::resize(internal_view, arg_N[0], arg_N[1], arg_N[2], arg_N[3],
1339 arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
1340 }
1341
1342 void realloc(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
1343 const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
1344 const size_t n6 = 0) {
1345 size_t arg_N[8] = {n0, n1, n2, n3, n4, n5, n6, 0};
1346 const int i = internal_view.rank - 1;
1347 arg_N[i] = unique_token.size();
1348
1349 ::Kokkos::realloc(internal_view, arg_N[0], arg_N[1], arg_N[2], arg_N[3],
1350 arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
1351 }
1352
1353 protected:
1354 template <typename... Args>
1355 KOKKOS_FORCEINLINE_FUNCTION original_reference_type at(int thread_id,
1356 Args... args) const {
1357 return internal_view(args..., thread_id);
1358 }
1359
1360 protected:
1361 using unique_token_type = Kokkos::Experimental::UniqueToken<
1362 execution_space, Kokkos::Experimental::UniqueTokenScope::Global>;
1363
1364 unique_token_type unique_token;
1365 internal_view_type internal_view;
1366};
1367
1368/* This object has to be separate in order to store the thread ID, which cannot
1369 be obtained until one is inside a parallel construct, and may be relatively
1370 expensive to obtain at every contribution
1371 (calls a non-inlined function, looks up a thread-local variable).
1372 Due to the expense, it is sensible to query it at most once per parallel
1373 iterate (ideally once per thread, but parallel_for doesn't expose that) and
1374 then store it in a stack variable.
1375 ScatterAccess serves as a non-const object on the stack which can store the
1376 thread ID */
1377
1378template <typename DataType, typename Op, typename DeviceType, typename Layout,
1379 typename Contribution, typename OverrideContribution>
1380class ScatterAccess<DataType, Op, DeviceType, Layout, ScatterDuplicated,
1381 Contribution, OverrideContribution> {
1382 public:
1383 using view_type = ScatterView<DataType, Layout, DeviceType, Op,
1384 ScatterDuplicated, Contribution>;
1385 using original_value_type = typename view_type::original_value_type;
1386 using value_type = Kokkos::Impl::Experimental::ScatterValue<
1387 original_value_type, Op, DeviceType, OverrideContribution>;
1388
1389 KOKKOS_FORCEINLINE_FUNCTION
1390 ScatterAccess(view_type const& view_in)
1391 : view(view_in), thread_id(view_in.unique_token.acquire()) {}
1392
1393 KOKKOS_FORCEINLINE_FUNCTION
1394 ~ScatterAccess() {
1395 if (thread_id != ~thread_id_type(0)) view.unique_token.release(thread_id);
1396 }
1397
1398 template <typename... Args>
1399 KOKKOS_FORCEINLINE_FUNCTION value_type operator()(Args... args) const {
1400 return view.at(thread_id, args...);
1401 }
1402
1403 template <typename Arg>
1404 KOKKOS_FORCEINLINE_FUNCTION
1405 typename std::enable_if<view_type::original_view_type::rank == 1 &&
1406 std::is_integral<Arg>::value,
1407 value_type>::type
1408 operator[](Arg arg) const {
1409 return view.at(thread_id, arg);
1410 }
1411
1412 private:
1413 view_type const& view;
1414
1415 // simplify RAII by disallowing copies
1416 ScatterAccess(ScatterAccess const& other) = delete;
1417 ScatterAccess& operator=(ScatterAccess const& other) = delete;
1418 ScatterAccess& operator=(ScatterAccess&& other) = delete;
1419
1420 public:
1421 // do need to allow moves though, for the common
1422 // auto b = a.access();
1423 // that assignments turns into a move constructor call
1424 KOKKOS_FORCEINLINE_FUNCTION
1425 ScatterAccess(ScatterAccess&& other)
1426 : view(other.view), thread_id(other.thread_id) {
1427 other.thread_id = ~thread_id_type(0);
1428 }
1429
1430 private:
1431 using unique_token_type = typename view_type::unique_token_type;
1432 using thread_id_type = typename unique_token_type::size_type;
1433 thread_id_type thread_id;
1434};
1435
1436template <typename Op = Kokkos::Experimental::ScatterSum,
1437 typename Duplication = void, typename Contribution = void,
1438 typename RT, typename... RP>
1439ScatterView<
1440 RT, typename ViewTraits<RT, RP...>::array_layout,
1441 typename ViewTraits<RT, RP...>::device_type, Op,
1442 std::conditional_t<
1443 std::is_same<Duplication, void>::value,
1444 typename Kokkos::Impl::Experimental::DefaultDuplication<
1445 typename ViewTraits<RT, RP...>::execution_space>::type,
1446 Duplication>,
1447 std::conditional_t<
1448 std::is_same<Contribution, void>::value,
1449 typename Kokkos::Impl::Experimental::DefaultContribution<
1450 typename ViewTraits<RT, RP...>::execution_space,
1451 typename std::conditional_t<
1452 std::is_same<Duplication, void>::value,
1453 typename Kokkos::Impl::Experimental::DefaultDuplication<
1454 typename ViewTraits<RT, RP...>::execution_space>::type,
1455 Duplication>>::type,
1456 Contribution>>
1457create_scatter_view(View<RT, RP...> const& original_view) {
1458 return original_view; // implicit ScatterView constructor call
1459}
1460
1461template <typename Op, typename RT, typename... RP>
1462ScatterView<
1463 RT, typename ViewTraits<RT, RP...>::array_layout,
1464 typename ViewTraits<RT, RP...>::device_type, Op,
1465 typename Kokkos::Impl::Experimental::DefaultDuplication<
1466 typename ViewTraits<RT, RP...>::execution_space>::type,
1467 typename Kokkos::Impl::Experimental::DefaultContribution<
1468 typename ViewTraits<RT, RP...>::execution_space,
1469 typename Kokkos::Impl::Experimental::DefaultDuplication<
1470 typename ViewTraits<RT, RP...>::execution_space>::type>::type>
1471create_scatter_view(Op, View<RT, RP...> const& original_view) {
1472 return original_view; // implicit ScatterView constructor call
1473}
1474
1475template <typename Op, typename Duplication, typename Contribution, typename RT,
1476 typename... RP>
1477ScatterView<RT, typename ViewTraits<RT, RP...>::array_layout,
1478 typename ViewTraits<RT, RP...>::device_type, Op, Duplication,
1479 Contribution>
1480create_scatter_view(Op, Duplication, Contribution,
1481 View<RT, RP...> const& original_view) {
1482 return original_view; // implicit ScatterView constructor call
1483}
1484
1485} // namespace Experimental
1486} // namespace Kokkos
1487
1488namespace Kokkos {
1489namespace Experimental {
1490
1491template <typename DT1, typename DT2, typename LY, typename ES, typename OP,
1492 typename CT, typename DP, typename... VP>
1493void contribute(
1494 typename ES::execution_space const& exec_space, View<DT1, VP...>& dest,
1495 Kokkos::Experimental::ScatterView<DT2, LY, ES, OP, CT, DP> const& src) {
1496 src.contribute_into(exec_space, dest);
1497}
1498
1499template <typename DT1, typename DT2, typename LY, typename ES, typename OP,
1500 typename CT, typename DP, typename... VP>
1501void contribute(
1502 View<DT1, VP...>& dest,
1503 Kokkos::Experimental::ScatterView<DT2, LY, ES, OP, CT, DP> const& src) {
1504 using execution_space = typename ES::execution_space;
1505 contribute(execution_space{}, dest, src);
1506}
1507
1508} // namespace Experimental
1509} // namespace Kokkos
1510
1511namespace Kokkos {
1512
1513template <typename DT, typename LY, typename ES, typename OP, typename CT,
1514 typename DP, typename... IS>
1515void realloc(
1516 Kokkos::Experimental::ScatterView<DT, LY, ES, OP, CT, DP>& scatter_view,
1517 IS... is) {
1518 scatter_view.realloc(is...);
1519}
1520
1521template <typename DT, typename LY, typename ES, typename OP, typename CT,
1522 typename DP, typename... IS>
1523void resize(
1524 Kokkos::Experimental::ScatterView<DT, LY, ES, OP, CT, DP>& scatter_view,
1525 IS... is) {
1526 scatter_view.resize(is...);
1527}
1528
1529} // namespace Kokkos
1530
1531#endif
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename std::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=nullptr)
Execute functor in parallel according to the execution policy.
A thread safe view to a bitset.
class to generate unique ids base on the required amount of concurrency
View to an array of data.
Memory layout tag indicating left-to-right (Fortran scheme) striding of multi-indices.
Memory layout tag indicating right-to-left (C or lexigraphical scheme) striding of multi-indices.
Can AccessSpace access MemorySpace ?