Tpetra parallel linear algebra  Version of the Day
Tpetra_Experimental_BlockView.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_EXPERIMENTAL_BLOCKVIEW_HPP
43 #define TPETRA_EXPERIMENTAL_BLOCKVIEW_HPP
44 
52 
53 #include "TpetraCore_config.h"
54 #include "Kokkos_ArithTraits.hpp"
55 #include "Kokkos_Complex.hpp"
56 
57 namespace Tpetra {
58 
70 namespace Experimental {
71 
72 namespace Impl {
73 
80 template<class ViewType1,
81  class ViewType2,
82  const int rank1 = ViewType1::rank>
83 struct AbsMax {
84  static KOKKOS_INLINE_FUNCTION void
85  run (const ViewType2& Y, const ViewType1& X);
86 };
87 
92 template<class ViewType1,
93  class ViewType2>
94 struct AbsMax<ViewType1, ViewType2, 2> {
97  static KOKKOS_INLINE_FUNCTION void
98  run (const ViewType2& Y, const ViewType1& X)
99  {
100  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
101  "AbsMax: ViewType1 and ViewType2 must have the same rank.");
102  typedef typename std::remove_reference<decltype (Y(0,0)) >::type STY;
103  static_assert(! std::is_const<STY>::value,
104  "AbsMax: The type of each entry of Y must be nonconst.");
105  typedef typename std::decay<decltype (X(0,0)) >::type STX;
106  static_assert( std::is_same<STX, STY>::value,
107  "AbsMax: The type of each entry of X and Y must be the same.");
108  typedef Kokkos::Details::ArithTraits<STY> KAT;
109 
110  const int numCols = Y.dimension_1 ();
111  const int numRows = Y.dimension_0 ();
112  for (int j = 0; j < numCols; ++j) {
113  for (int i = 0; i < numRows; ++i) {
114  STY& Y_ij = Y(i,j); // use ref here to avoid 2nd op() call on Y
115  const STX X_ij = X(i,j);
116  // NOTE: no std::max (not a CUDA __device__ function); must
117  // cast back up to complex.
118  const auto Y_ij_abs = KAT::abs (Y_ij);
119  const auto X_ij_abs = KAT::abs (X_ij);
120  Y_ij = (Y_ij_abs >= X_ij_abs) ?
121  static_cast<STY> (Y_ij_abs) :
122  static_cast<STY> (X_ij_abs);
123  }
124  }
125  }
126 };
127 
132 template<class ViewType1,
133  class ViewType2>
134 struct AbsMax<ViewType1, ViewType2, 1> {
137  static KOKKOS_INLINE_FUNCTION void
138  run (const ViewType2& Y, const ViewType1& X)
139  {
140  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
141  "AbsMax: ViewType1 and ViewType2 must have the same rank.");
142 
143  typedef typename std::remove_reference<decltype (Y(0)) >::type STY;
144  static_assert(! std::is_const<STY>::value,
145  "AbsMax: The type of each entry of Y must be nonconst.");
146  typedef typename std::remove_const<typename std::remove_reference<decltype (X(0)) >::type>::type STX;
147  static_assert( std::is_same<STX, STY>::value,
148  "AbsMax: The type of each entry of X and Y must be the same.");
149  typedef Kokkos::Details::ArithTraits<STY> KAT;
150 
151  const int numRows = Y.dimension_0 ();
152  for (int i = 0; i < numRows; ++i) {
153  STY& Y_i = Y(i); // use ref here to avoid 2nd op() call on Y
154  const STX X_i = X(i);
155  // NOTE: no std::max (not a CUDA __device__ function); must
156  // cast back up to complex.
157  const auto Y_i_abs = KAT::abs (Y_i);
158  const auto X_i_abs = KAT::abs (X_i);
159  Y_i = (Y_i_abs >= X_i_abs) ?
160  static_cast<STY> (Y_i_abs) :
161  static_cast<STY> (X_i_abs);
162  }
163  }
164 };
165 
172 template<class ViewType1, class ViewType2, const int rank = ViewType1::rank>
173 KOKKOS_INLINE_FUNCTION void
174 absMax (const ViewType2& Y, const ViewType1& X)
175 {
176  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
177  "absMax: ViewType1 and ViewType2 must have the same rank.");
179 }
180 
185 template<class ViewType,
186  class CoefficientType,
187  class LayoutType = typename ViewType::array_layout,
188  class IndexType = int,
189  const int rank = ViewType::rank>
190 struct SCAL {
191  static KOKKOS_INLINE_FUNCTION void
192  run (const CoefficientType& alpha, const ViewType& x);
193 };
194 
197 template<class ViewType,
198  class CoefficientType,
199  class LayoutType,
200  class IndexType>
201 struct SCAL<ViewType, CoefficientType, LayoutType, IndexType, 1> {
203  static KOKKOS_INLINE_FUNCTION void
204  run (const CoefficientType& alpha, const ViewType& x)
205  {
206  const IndexType numRows = static_cast<IndexType> (x.dimension_0 ());
207  // BLAS _SCAL doesn't check whether alpha is 0.
208  for (IndexType i = 0; i < numRows; ++i) {
209  x(i) = alpha * x(i);
210  }
211  }
212 };
213 
216 template<class ViewType,
217  class CoefficientType,
218  class LayoutType,
219  class IndexType>
220 struct SCAL<ViewType, CoefficientType, LayoutType, IndexType, 2> {
222  static KOKKOS_INLINE_FUNCTION void
223  run (const CoefficientType& alpha, const ViewType& A)
224  {
225  const IndexType numRows = static_cast<IndexType> (A.dimension_0 ());
226  const IndexType numCols = static_cast<IndexType> (A.dimension_1 ());
227 
228  // BLAS _SCAL doesn't check whether alpha is 0.
229  for (IndexType i = 0; i < numRows; ++i) {
230  for (IndexType j = 0; j < numCols; ++j) {
231  A(i,j) = alpha * A(i,j);
232  }
233  }
234  }
235 };
236 
242 template<class ViewType,
243  class CoefficientType,
244  class IndexType>
245 struct SCAL<ViewType, CoefficientType, Kokkos::LayoutRight, IndexType, 2> {
247  static KOKKOS_INLINE_FUNCTION void
248  run (const CoefficientType& alpha, const ViewType& A)
249  {
250  const IndexType N = A.size ();
251  typedef typename std::decay<decltype (A(0,0)) >::type scalar_type;
252  scalar_type* const A_raw = A.ptr_on_device ();
253 
254  for (IndexType i = 0; i < N; ++i) {
255  A_raw[i] = alpha * A_raw[i];
256  }
257  }
258 };
259 
260 
265 template<class ViewType,
266  class InputType,
267  class LayoutType = typename ViewType::array_layout,
268  class IndexType = int,
269  const int rank = ViewType::rank>
270 struct FILL {
271  static KOKKOS_INLINE_FUNCTION void
272  run (const ViewType& x, const InputType& val);
273 };
274 
277 template<class ViewType,
278  class InputType,
279  class LayoutType,
280  class IndexType>
281 struct FILL<ViewType, InputType, LayoutType, IndexType, 1> {
282  static KOKKOS_INLINE_FUNCTION void
283  run (const ViewType& x, const InputType& val)
284  {
285  const IndexType numRows = static_cast<IndexType> (x.dimension_0 ());
286  for (IndexType i = 0; i < numRows; ++i) {
287  x(i) = val;
288  }
289  }
290 };
291 
294 template<class ViewType,
295  class InputType,
296  class LayoutType,
297  class IndexType>
298 struct FILL<ViewType, InputType, LayoutType, IndexType, 2> {
299  static KOKKOS_INLINE_FUNCTION void
300  run (const ViewType& X, const InputType& val)
301  {
302  const IndexType numRows = static_cast<IndexType> (X.dimension_0 ());
303  const IndexType numCols = static_cast<IndexType> (X.dimension_1 ());
304  for (IndexType j = 0; j < numCols; ++j) {
305  for (IndexType i = 0; i < numRows; ++i) {
306  X(i,j) = val;
307  }
308  }
309  }
310 };
311 
316 template<class CoefficientType,
317  class ViewType1,
318  class ViewType2,
319  class LayoutType1 = typename ViewType1::array_layout,
320  class LayoutType2 = typename ViewType2::array_layout,
321  class IndexType = int,
322  const int rank = ViewType1::rank>
323 struct AXPY {
324  static KOKKOS_INLINE_FUNCTION void
325  run (const CoefficientType& alpha,
326  const ViewType1& x,
327  const ViewType2& y);
328 };
329 
332 template<class CoefficientType,
333  class ViewType1,
334  class ViewType2,
335  class LayoutType1,
336  class LayoutType2,
337  class IndexType>
338 struct AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 1> {
340  static KOKKOS_INLINE_FUNCTION void
341  run (const CoefficientType& alpha,
342  const ViewType1& x,
343  const ViewType2& y)
344  {
345  using Kokkos::Details::ArithTraits;
346  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
347  "AXPY: x and y must have the same rank.");
348 
349  const IndexType numRows = static_cast<IndexType> (y.dimension_0 ());
350  if (alpha != ArithTraits<CoefficientType>::zero ()) {
351  for (IndexType i = 0; i < numRows; ++i) {
352  y(i) += alpha * x(i);
353  }
354  }
355  }
356 };
357 
360 template<class CoefficientType,
361  class ViewType1,
362  class ViewType2,
363  class LayoutType1,
364  class LayoutType2,
365  class IndexType>
366 struct AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 2> {
368  static KOKKOS_INLINE_FUNCTION void
369  run (const CoefficientType& alpha,
370  const ViewType1& X,
371  const ViewType2& Y)
372  {
373  using Kokkos::Details::ArithTraits;
374  static_assert (ViewType1::rank == ViewType2::rank,
375  "AXPY: X and Y must have the same rank.");
376  const IndexType numRows = static_cast<IndexType> (Y.dimension_0 ());
377  const IndexType numCols = static_cast<IndexType> (Y.dimension_1 ());
378 
379  if (alpha != ArithTraits<CoefficientType>::zero ()) {
380  for (IndexType i = 0; i < numRows; ++i) {
381  for (IndexType j = 0; j < numCols; ++j) {
382  Y(i,j) += alpha * X(i,j);
383  }
384  }
385  }
386  }
387 };
388 
392 template<class CoefficientType,
393  class ViewType1,
394  class ViewType2,
395  class IndexType>
396 struct AXPY<CoefficientType, ViewType1, ViewType2, Kokkos::LayoutRight, Kokkos::LayoutRight, IndexType, 2> {
398  static KOKKOS_INLINE_FUNCTION void
399  run (const CoefficientType& alpha,
400  const ViewType1& X,
401  const ViewType2& Y)
402  {
403  using Kokkos::Details::ArithTraits;
404  static_assert (static_cast<int> (ViewType1::rank) ==
405  static_cast<int> (ViewType2::rank),
406  "AXPY: X and Y must have the same rank.");
407  typedef typename std::decay<decltype (X(0,0)) >::type SX;
408  typedef typename std::decay<decltype (Y(0,0)) >::type SY;
409 
410  const IndexType N = static_cast<IndexType> (Y.size ());
411  const SX* const X_raw = X.ptr_on_device ();
412  SY* const Y_raw = Y.ptr_on_device ();
413 
414  if (alpha != ArithTraits<CoefficientType>::zero ()) {
415  for (IndexType i = 0; i < N; ++i) {
416  Y_raw[i] += alpha * X_raw[i];
417  }
418  }
419  }
420 };
421 
425 template<class CoefficientType,
426  class ViewType1,
427  class ViewType2,
428  class IndexType>
429 struct AXPY<CoefficientType, ViewType1, ViewType2, Kokkos::LayoutLeft, Kokkos::LayoutLeft, IndexType, 2> {
431  static KOKKOS_INLINE_FUNCTION void
432  run (const CoefficientType& alpha,
433  const ViewType1& X,
434  const ViewType2& Y)
435  {
436  using Kokkos::Details::ArithTraits;
437  static_assert (ViewType1::rank == ViewType2::rank,
438  "AXPY: X and Y must have the same rank.");
439  typedef typename std::decay<decltype (X(0,0)) >::type SX;
440  typedef typename std::decay<decltype (Y(0,0)) >::type SY;
441 
442  const IndexType N = static_cast<IndexType> (Y.size ());
443  const SX* const X_raw = X.ptr_on_device ();
444  SY* const Y_raw = Y.ptr_on_device ();
445 
446  if (alpha != ArithTraits<CoefficientType>::zero ()) {
447  for (IndexType i = 0; i < N; ++i) {
448  Y_raw[i] += alpha * X_raw[i];
449  }
450  }
451  }
452 };
453 
458 template<class ViewType1,
459  class ViewType2,
460  class LayoutType1 = typename ViewType1::array_layout,
461  class LayoutType2 = typename ViewType2::array_layout,
462  class IndexType = int,
463  const int rank = ViewType1::rank>
464 struct COPY {
465  static KOKKOS_INLINE_FUNCTION void
466  run (const ViewType1& x, const ViewType2& y);
467 };
468 
471 template<class ViewType1,
472  class ViewType2,
473  class LayoutType1,
474  class LayoutType2,
475  class IndexType>
476 struct COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 1> {
478  static KOKKOS_INLINE_FUNCTION void
479  run (const ViewType1& x, const ViewType2& y)
480  {
481  const IndexType numRows = static_cast<IndexType> (x.dimension_0 ());
482  for (IndexType i = 0; i < numRows; ++i) {
483  y(i) = x(i);
484  }
485  }
486 };
487 
490 template<class ViewType1,
491  class ViewType2,
492  class LayoutType1,
493  class LayoutType2,
494  class IndexType>
495 struct COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 2> {
497  static KOKKOS_INLINE_FUNCTION void
498  run (const ViewType1& X, const ViewType2& Y)
499  {
500  const IndexType numRows = static_cast<IndexType> (Y.dimension_0 ());
501  const IndexType numCols = static_cast<IndexType> (Y.dimension_1 ());
502 
503  // BLAS _SCAL doesn't check whether alpha is 0.
504  for (IndexType i = 0; i < numRows; ++i) {
505  for (IndexType j = 0; j < numCols; ++j) {
506  Y(i,j) = X(i,j);
507  }
508  }
509  }
510 };
511 
515 template<class ViewType1,
516  class ViewType2,
517  class IndexType>
518 struct COPY<ViewType1, ViewType2, Kokkos::LayoutRight, Kokkos::LayoutRight, IndexType, 2> {
520  static KOKKOS_INLINE_FUNCTION void
521  run (const ViewType1& X, const ViewType2& Y)
522  {
523  typedef typename std::decay<decltype (X(0,0)) >::type SX;
524  typedef typename std::decay<decltype (Y(0,0)) >::type SY;
525 
526  const IndexType N = static_cast<IndexType> (Y.size ());
527  const SX* const X_raw = X.ptr_on_device ();
528  SY* const Y_raw = Y.ptr_on_device ();
529 
530  // BLAS _SCAL doesn't check whether alpha is 0.
531  for (IndexType i = 0; i < N; ++i) {
532  Y_raw[i] = X_raw[i];
533  }
534  }
535 };
536 
540 template<class ViewType1,
541  class ViewType2,
542  class IndexType>
543 struct COPY<ViewType1, ViewType2, Kokkos::LayoutLeft, Kokkos::LayoutLeft, IndexType, 2> {
545  static KOKKOS_INLINE_FUNCTION void
546  run (const ViewType1& X, const ViewType2& Y)
547  {
548  typedef typename std::decay<decltype (X(0,0)) >::type SX;
549  typedef typename std::decay<decltype (Y(0,0)) >::type SY;
550 
551  const IndexType N = static_cast<IndexType> (Y.size ());
552  const SX* const X_raw = X.ptr_on_device ();
553  SY* const Y_raw = Y.ptr_on_device ();
554 
555  // BLAS _SCAL doesn't check whether alpha is 0.
556  for (IndexType i = 0; i < N; ++i) {
557  Y_raw[i] = X_raw[i];
558  }
559  }
560 };
561 
562 
563 template<class VecType1,
564  class BlkType,
565  class VecType2,
566  class CoeffType,
567  class IndexType = int,
568  class VecLayoutType1 = typename VecType1::array_layout,
569  class BlkLayoutType = typename BlkType::array_layout,
570  class VecLayoutType2 = typename VecType2::array_layout>
571 struct GEMV {
572  static KOKKOS_INLINE_FUNCTION void
573  run (const CoeffType& alpha,
574  const BlkType& A,
575  const VecType1& x,
576  const VecType2& y)
577  {
578  static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
579  static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
580  static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
581  typedef typename std::decay<decltype (y(0)) >::type y_elt_type;
582 
583  const IndexType numRows = static_cast<IndexType> (A.dimension_0 ());
584  const IndexType numCols = static_cast<IndexType> (A.dimension_1 ());
585 
586  for (IndexType i = 0; i < numRows; ++i) {
587  y_elt_type y_i = y(i);
588  for (IndexType j = 0; j < numCols; ++j) {
589  y_i += alpha * A(i,j) * x(j);
590  }
591  y(i) = y_i;
592  }
593  }
594 };
595 
596 template<class VecType1,
597  class BlkType,
598  class VecType2,
599  class CoeffType,
600  class IndexType>
601 struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType,
602  Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight>
603 {
604  static KOKKOS_INLINE_FUNCTION void
605  run (const CoeffType& alpha,
606  const BlkType& A,
607  const VecType1& x,
608  const VecType2& y)
609  {
610  static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
611  static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
612  static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
613  typedef typename std::decay<decltype (y(0)) >::type y_elt_type;
614  typedef typename std::decay<decltype (A(0,0)) >::type A_elt_type;
615 
616  const IndexType numRows = static_cast<IndexType> (A.dimension_0 ());
617  const IndexType numCols = static_cast<IndexType> (A.dimension_1 ());
618  const A_elt_type* const A_raw = A.ptr_on_device ();
619 
620  for (IndexType i = 0; i < numRows; ++i) {
621  y_elt_type y_i = y(i);
622  const A_elt_type* const A_i = A_raw + i*numCols;
623  for (IndexType j = 0; j < numCols; ++j) {
624  y_i += alpha * A_i[j] * x(j);
625  }
626  y(i) = y_i;
627  }
628  }
629 };
630 
631 template<class VecType1,
632  class BlkType,
633  class VecType2,
634  class CoeffType,
635  class IndexType>
636 struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType,
637  Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft>
638 {
639  static KOKKOS_INLINE_FUNCTION void
640  run (const CoeffType& alpha,
641  const BlkType& A,
642  const VecType1& x,
643  const VecType2& y)
644  {
645  static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
646  static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
647  static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
648  typedef typename std::decay<decltype (A(0,0)) >::type A_elt_type;
649 
650  const A_elt_type* const A_raw = A.ptr_on_device ();
651  const IndexType numRows = static_cast<IndexType> (A.dimension_0 ());
652  const IndexType numCols = static_cast<IndexType> (A.dimension_1 ());
653  for (IndexType j = 0; j < numCols; ++j) {
654  const A_elt_type* const A_j = A_raw + j*numRows;
655  for (IndexType i = 0; i < numRows; ++i) {
656  y(i) += alpha * A_j[i] * x(i);
657  }
658  }
659  }
660 };
661 
662 } // namespace Impl
663 
666 template<class ViewType,
667  class CoefficientType,
668  class LayoutType = typename ViewType::array_layout,
669  class IndexType = int,
670  const int rank = ViewType::rank>
671 KOKKOS_INLINE_FUNCTION void
672 SCAL (const CoefficientType& alpha, const ViewType& x)
673 {
675 }
676 
678 template<class ViewType,
679  class InputType,
680  class LayoutType = typename ViewType::array_layout,
681  class IndexType = int,
682  const int rank = ViewType::rank>
683 KOKKOS_INLINE_FUNCTION void
684 FILL (const ViewType& x, const InputType& val)
685 {
687 }
688 
694 template<class CoefficientType,
695  class ViewType1,
696  class ViewType2,
697  class LayoutType1 = typename ViewType1::array_layout,
698  class LayoutType2 = typename ViewType2::array_layout,
699  class IndexType = int,
700  const int rank = ViewType1::rank>
701 KOKKOS_INLINE_FUNCTION void
702 AXPY (const CoefficientType& alpha,
703  const ViewType1& x,
704  const ViewType2& y)
705 {
706  static_assert (static_cast<int> (ViewType1::rank) ==
707  static_cast<int> (ViewType2::rank),
708  "AXPY: x and y must have the same rank.");
709  Impl::AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2,
710  IndexType, rank>::run (alpha, x, y);
711 }
712 
721 template<class ViewType1,
722  class ViewType2,
723  class LayoutType1 = typename ViewType1::array_layout,
724  class LayoutType2 = typename ViewType2::array_layout,
725  class IndexType = int,
726  const int rank = ViewType1::rank>
727 KOKKOS_INLINE_FUNCTION void
728 COPY (const ViewType1& x, const ViewType2& y)
729 {
730  static_assert (static_cast<int> (ViewType1::rank) ==
731  static_cast<int> (ViewType2::rank),
732  "COPY: x and y must have the same rank.");
733  Impl::COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType,
734  rank>::run (x, y);
735 }
736 
748 template<class VecType1,
749  class BlkType,
750  class VecType2,
751  class CoeffType,
752  class IndexType = int>
753 KOKKOS_INLINE_FUNCTION void
754 GEMV (const CoeffType& alpha,
755  const BlkType& A,
756  const VecType1& x,
757  const VecType2& y)
758 {
759  Impl::GEMV<VecType1, BlkType, VecType2, CoeffType,
760  IndexType>::run (alpha, A, x, y);
761 }
762 
770 template<class ViewType1,
771  class ViewType2,
772  class ViewType3,
773  class CoefficientType,
774  class IndexType = int>
775 KOKKOS_INLINE_FUNCTION void
776 GEMM (const char transA[],
777  const char transB[],
778  const CoefficientType& alpha,
779  const ViewType1& A,
780  const ViewType2& B,
781  const CoefficientType& beta,
782  const ViewType3& C)
783 {
784  // Assert that A, B, and C are in fact matrices
785  static_assert (ViewType1::rank == 2, "GEMM: A must have rank 2 (be a matrix).");
786  static_assert (ViewType2::rank == 2, "GEMM: B must have rank 2 (be a matrix).");
787  static_assert (ViewType3::rank == 2, "GEMM: C must have rank 2 (be a matrix).");
788 
789  typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
790  typedef Kokkos::Details::ArithTraits<Scalar> STS;
791  const Scalar ZERO = STS::zero();
792  const Scalar ONE = STS::one();
793 
794  // Get the dimensions
795  IndexType m, n, k;
796  if(transA[0] == 'N' || transA[0] == 'n') {
797  m = static_cast<IndexType> (A.dimension_0 ());
798  n = static_cast<IndexType> (A.dimension_1 ());
799  }
800  else {
801  m = static_cast<IndexType> (A.dimension_1 ());
802  n = static_cast<IndexType> (A.dimension_0 ());
803  }
804  k = static_cast<IndexType> (C.dimension_1 ());
805 
806  // quick return if possible
807  if(alpha == ZERO && beta == ONE)
808  return;
809 
810  // And if alpha equals zero...
811  if(alpha == ZERO) {
812  if(beta == ZERO) {
813  for(IndexType i=0; i<m; i++) {
814  for(IndexType j=0; j<k; j++) {
815  C(i,j) = ZERO;
816  }
817  }
818  }
819  else {
820  for(IndexType i=0; i<m; i++) {
821  for(IndexType j=0; j<k; j++) {
822  C(i,j) = beta*C(i,j);
823  }
824  }
825  }
826  }
827 
828  // Start the operations
829  if(transB[0] == 'n' || transB[0] == 'N') {
830  if(transA[0] == 'n' || transA[0] == 'N') {
831  // Form C = alpha*A*B + beta*C
832  for(IndexType j=0; j<n; j++) {
833  if(beta == ZERO) {
834  for(IndexType i=0; i<m; i++) {
835  C(i,j) = ZERO;
836  }
837  }
838  else if(beta != ONE) {
839  for(IndexType i=0; i<m; i++) {
840  C(i,j) = beta*C(i,j);
841  }
842  }
843  for(IndexType l=0; l<k; l++) {
844  Scalar temp = alpha*B(l,j);
845  for(IndexType i=0; i<m; i++) {
846  C(i,j) = C(i,j) + temp*A(i,l);
847  }
848  }
849  }
850  }
851  else {
852  // Form C = alpha*A**T*B + beta*C
853  for(IndexType j=0; j<n; j++) {
854  for(IndexType i=0; i<m; i++) {
855  Scalar temp = ZERO;
856  for(IndexType l=0; l<k; l++) {
857  temp = temp + A(l,i)*B(l,j);
858  }
859  if(beta == ZERO) {
860  C(i,j) = alpha*temp;
861  }
862  else {
863  C(i,j) = alpha*temp + beta*C(i,j);
864  }
865  }
866  }
867  }
868  }
869  else {
870  if(transA[0] == 'n' || transA[0] == 'N') {
871  // Form C = alpha*A*B**T + beta*C
872  for(IndexType j=0; j<n; j++) {
873  if(beta == ZERO) {
874  for(IndexType i=0; i<m; i++) {
875  C(i,j) = ZERO;
876  }
877  }
878  else if(beta != ONE) {
879  for(IndexType i=0; i<m; i++) {
880  C(i,j) = beta*C(i,j);
881  }
882  }
883  for(IndexType l=0; l<k; l++) {
884  Scalar temp = alpha*B(j,l);
885  for(IndexType i=0; i<m; i++) {
886  C(i,j) = C(i,j) + temp*A(i,l);
887  }
888  }
889  }
890  }
891  else {
892  // Form C = alpha*A**T*B**T + beta*C
893  for(IndexType j=0; j<n; j++) {
894  for(IndexType i=0; i<m; i++) {
895  Scalar temp = ZERO;
896  for(IndexType l=0; l<k; l++) {
897  temp = temp + A(l,i)*B(j,l);
898  }
899  if(beta == ZERO) {
900  C(i,j) = alpha*temp;
901  }
902  else {
903  C(i,j) = alpha*temp + beta*C(i,j);
904  }
905  }
906  }
907  }
908  }
909 }
910 
912 template<class LittleBlockType,
913  class LittleVectorType>
914 KOKKOS_INLINE_FUNCTION void
915 GETF2 (const LittleBlockType& A, const LittleVectorType& ipiv, int& info)
916 {
917  // The type of an entry of ipiv is the index type.
918  typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
919  static_assert (std::is_integral<IndexType>::value,
920  "GETF2: The type of each entry of ipiv must be an integer type.");
921  typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
922  static_assert (! std::is_const<Scalar>::value,
923  "GETF2: A must not be a const View (or LittleBlock).");
924  static_assert (! std::is_const<std::remove_reference<decltype (ipiv(0))>>::value,
925  "GETF2: ipiv must not be a const View (or LittleBlock).");
926  static_assert (LittleBlockType::rank == 2, "GETF2: A must have rank 2 (be a matrix).");
927  typedef Kokkos::Details::ArithTraits<Scalar> STS;
928  const Scalar ZERO = STS::zero();
929 
930  const IndexType numRows = static_cast<IndexType> (A.dimension_0 ());
931  const IndexType numCols = static_cast<IndexType> (A.dimension_1 ());
932  const IndexType pivDim = static_cast<IndexType> (ipiv.dimension_0 ());
933 
934  // std::min is not a CUDA device function
935  const IndexType minPivDim = (numRows < numCols) ? numRows : numCols;
936  if (pivDim < minPivDim) {
937  info = -2;
938  return;
939  }
940 
941  // Initialize info
942  info = 0;
943 
944  for(IndexType j=0; j < pivDim; j++)
945  {
946  // Find pivot and test for singularity
947  IndexType jp = j;
948  for(IndexType i=j+1; i<numRows; i++)
949  {
950  if(STS::abs(A(i,j)) > STS::abs(A(jp,j))) {
951  jp = i;
952  }
953  }
954  ipiv(j) = jp+1;
955 
956  if(A(jp,j) != ZERO)
957  {
958  // Apply the interchange to columns 1:N
959  if(jp != j)
960  {
961  for(IndexType i=0; i < numCols; i++)
962  {
963  Scalar temp = A(jp,i);
964  A(jp,i) = A(j,i);
965  A(j,i) = temp;
966  }
967  }
968 
969  // Compute elements J+1:M of J-th column
970  for(IndexType i=j+1; i<numRows; i++) {
971  A(i,j) = A(i,j) / A(j,j);
972  }
973  }
974  else if(info == 0) {
975  info = j;
976  }
977 
978  // Update trailing submatrix
979  for(IndexType r=j+1; r < numRows; r++)
980  {
981  for(IndexType c=j+1; c < numCols; c++) {
982  A(r,c) = A(r,c) - A(r,j) * A(j,c);
983  }
984  }
985  }
986 }
987 
988 namespace Impl {
989 
993 template<class LittleBlockType,
994  class LittleIntVectorType,
995  class LittleScalarVectorType,
996  const int rank = LittleScalarVectorType::rank>
997 struct GETRS {
998  static KOKKOS_INLINE_FUNCTION void
999  run (const char mode[],
1000  const LittleBlockType& A,
1001  const LittleIntVectorType& ipiv,
1002  const LittleScalarVectorType& B,
1003  int& info);
1004 };
1005 
1007 template<class LittleBlockType,
1008  class LittleIntVectorType,
1009  class LittleScalarVectorType>
1010 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 1> {
1011  static KOKKOS_INLINE_FUNCTION void
1012  run (const char mode[],
1013  const LittleBlockType& A,
1014  const LittleIntVectorType& ipiv,
1015  const LittleScalarVectorType& B,
1016  int& info)
1017  {
1018  // The type of an entry of ipiv is the index type.
1019  typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1020  // IndexType must be signed, because this code does a countdown loop
1021  // to zero. Unsigned integers are always >= 0, even on underflow.
1022  static_assert (std::is_integral<IndexType>::value &&
1023  std::is_signed<IndexType>::value,
1024  "GETRS: The type of each entry of ipiv must be a signed integer.");
1025  typedef typename std::decay<decltype (A(0,0))>::type Scalar;
1026  static_assert (! std::is_const<std::remove_reference<decltype (B(0))>>::value,
1027  "GETRS: B must not be a const View (or LittleBlock).");
1028  static_assert (LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
1029  static_assert (LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
1030  static_assert (LittleScalarVectorType::rank == 1, "GETRS: For this specialization, B must have rank 1.");
1031 
1032  typedef Kokkos::Details::ArithTraits<Scalar> STS;
1033  const Scalar ZERO = STS::zero();
1034  const IndexType numRows = static_cast<IndexType> (A.dimension_0 ());
1035  const IndexType numCols = static_cast<IndexType> (A.dimension_1 ());
1036  const IndexType pivDim = static_cast<IndexType> (ipiv.dimension_0 ());
1037 
1038  info = 0;
1039 
1040  // Ensure that the matrix is square
1041  if (numRows != numCols) {
1042  info = -2;
1043  return;
1044  }
1045 
1046  // Ensure that the pivot array is sufficiently large
1047  if (pivDim < numRows) {
1048  info = -3;
1049  return;
1050  }
1051 
1052  // No transpose case
1053  if(mode[0] == 'n' || mode[0] == 'N') {
1054  // Apply row interchanges to the RHS
1055  for(IndexType i=0; i<numRows; i++) {
1056  if(ipiv(i) != i+1) {
1057  Scalar temp = B(i);
1058  B(i) = B(ipiv(i)-1);
1059  B(ipiv(i)-1) = temp;
1060  }
1061  }
1062 
1063  // Solve Lx=b, overwriting b with x
1064  for(IndexType r=1; r < numRows; r++) {
1065  for(IndexType c=0; c < r; c++) {
1066  B(r) = B(r) - A(r,c)*B(c);
1067  }
1068  }
1069 
1070  // Solve Ux=b, overwriting b with x
1071  for(IndexType r=numRows-1; r >= 0; r--) {
1072  // Check whether U is singular
1073  if(A(r,r) == ZERO) {
1074  info = r+1;
1075  return;
1076  }
1077 
1078  for(IndexType c=r+1; c < numCols; c++) {
1079  B(r) = B(r) - A(r,c)*B(c);
1080  }
1081  B(r) = B(r) / A(r,r);
1082  }
1083  }
1084  // Transpose case
1085  else if(mode[0] == 't' || mode[0] == 'T') {
1086  info = -1; // NOT YET IMPLEMENTED
1087  return;
1088  }
1089  // Conjugate transpose case
1090  else if (mode[0] == 'c' || mode[0] == 'C') {
1091  info = -1; // NOT YET IMPLEMENTED
1092  return;
1093  }
1094  else { // invalid mode
1095  info = -1;
1096  return;
1097  }
1098  }
1099 };
1100 
1101 
1103 template<class LittleBlockType,
1104  class LittleIntVectorType,
1105  class LittleScalarVectorType>
1106 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 2> {
1107  static KOKKOS_INLINE_FUNCTION void
1108  run (const char mode[],
1109  const LittleBlockType& A,
1110  const LittleIntVectorType& ipiv,
1111  const LittleScalarVectorType& B,
1112  int& info)
1113  {
1114  // The type of an entry of ipiv is the index type.
1115  typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
1116  static_assert (std::is_integral<IndexType>::value,
1117  "GETRS: The type of each entry of ipiv must be an integer type.");
1118  static_assert (! std::is_const<std::remove_reference<decltype (B(0)) > >::value,
1119  "GETRS: B must not be a const View (or LittleBlock).");
1120  static_assert (LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
1121  static_assert (LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
1122  static_assert (LittleScalarVectorType::rank == 2, "GETRS: For this specialization, B must have rank 2.");
1123 
1124  // The current implementation iterates over one right-hand side at
1125  // a time. It might be faster to do this differently, but this
1126  // should work for now.
1127  const IndexType numRhs = B.dimension_1 ();
1128  info = 0;
1129 
1130  for (IndexType rhs = 0; rhs < numRhs; ++rhs) {
1131  auto B_cur = Kokkos::subview (B, Kokkos::ALL (), rhs);
1133  if (info != 0) {
1134  return;
1135  }
1136  }
1137  }
1138 };
1139 
1140 } // namespace Impl
1141 
1145 template<class LittleBlockType,
1146  class LittleIntVectorType,
1147  class LittleScalarVectorType>
1148 KOKKOS_INLINE_FUNCTION void
1149 GETRS (const char mode[],
1150  const LittleBlockType& A,
1151  const LittleIntVectorType& ipiv,
1152  const LittleScalarVectorType& B,
1153  int& info)
1154 {
1155  Impl::GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType,
1156  LittleScalarVectorType::rank>::run (mode, A, ipiv, B, info);
1157 }
1158 
1159 
1174 template<class LittleBlockType,
1175  class LittleIntVectorType,
1176  class LittleScalarVectorType>
1177 KOKKOS_INLINE_FUNCTION void
1178 GETRI (const LittleBlockType& A,
1179  const LittleIntVectorType& ipiv,
1180  const LittleScalarVectorType& work,
1181  int& info)
1182 {
1183  // The type of an entry of ipiv is the index type.
1184  typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1185  // IndexType must be signed, because this code does a countdown loop
1186  // to zero. Unsigned integers are always >= 0, even on underflow.
1187  static_assert (std::is_integral<IndexType>::value &&
1188  std::is_signed<IndexType>::value,
1189  "GETRI: The type of each entry of ipiv must be a signed integer.");
1190  typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
1191  static_assert (! std::is_const<std::remove_reference<decltype (A(0,0))>>::value,
1192  "GETRI: A must not be a const View (or LittleBlock).");
1193  static_assert (! std::is_const<std::remove_reference<decltype (work(0))>>::value,
1194  "GETRI: work must not be a const View (or LittleBlock).");
1195  static_assert (LittleBlockType::rank == 2, "GETRI: A must have rank 2 (be a matrix).");
1196  typedef Kokkos::Details::ArithTraits<Scalar> STS;
1197  const Scalar ZERO = STS::zero();
1198  const Scalar ONE = STS::one();
1199 
1200  const IndexType numRows = static_cast<IndexType> (A.dimension_0 ());
1201  const IndexType numCols = static_cast<IndexType> (A.dimension_1 ());
1202  const IndexType pivDim = static_cast<IndexType> (ipiv.dimension_0 ());
1203  const IndexType workDim = static_cast<IndexType> (work.dimension_0 ());
1204 
1205  info = 0;
1206 
1207  // Ensure that the matrix is square
1208  if (numRows != numCols) {
1209  info = -1;
1210  return;
1211  }
1212 
1213  // Ensure that the pivot array is sufficiently large
1214  if (pivDim < numRows) {
1215  info = -2;
1216  return;
1217  }
1218 
1219  // Ensure that the work array is sufficiently large
1220  if (workDim < numRows) {
1221  info = -3;
1222  return;
1223  }
1224 
1225  // Form Uinv in place
1226  for(IndexType j=0; j < numRows; j++) {
1227  if(A(j,j) == ZERO) {
1228  info = j+1;
1229  return;
1230  }
1231 
1232  A(j,j) = ONE / A(j,j);
1233 
1234  // Compute elements 1:j-1 of j-th column
1235  for(IndexType r=0; r < j; r++) {
1236  A(r,j) = A(r,r)*A(r,j);
1237  for(IndexType c=r+1; c < j; c++) {
1238  A(r,j) = A(r,j) + A(r,c)*A(c,j);
1239  }
1240  }
1241  for(IndexType r=0; r < j; r++) {
1242  A(r,j) = -A(j,j)*A(r,j);
1243  }
1244  }
1245 
1246  // Compute Ainv by solving A\L = Uinv
1247  for(IndexType j = numCols-2; j >= 0; j--) {
1248  // Copy lower triangular data to work array and replace with 0
1249  for(IndexType r=j+1; r < numRows; r++) {
1250  work(r) = A(r,j);
1251  A(r,j) = 0;
1252  }
1253 
1254  for(IndexType r=0; r < numRows; r++) {
1255  for(IndexType i=j+1; i < numRows; i++) {
1256  A(r,j) = A(r,j) - work(i)*A(r,i);
1257  }
1258  }
1259  }
1260 
1261  // Apply column interchanges
1262  for(IndexType j=numRows-1; j >= 0; j--) {
1263  IndexType jp = ipiv(j)-1;
1264  if(j != jp) {
1265  for(IndexType r=0; r < numRows; r++) {
1266  Scalar temp = A(r,j);
1267  A(r,j) = A(r,jp);
1268  A(r,jp) = temp;
1269  }
1270  }
1271  }
1272 }
1273 
1274 
1275 // mfh 08 Nov 2015: I haven't tested this overload yet. It also needs
1276 // an implementation for trans != 'N' (the transpose and conjugate
1277 // transpose cases).
1278 #if 0
1279 template<class LittleBlockType,
1280  class LittleVectorTypeX,
1281  class LittleVectorTypeY,
1282  class CoefficientType,
1283  class IndexType = int>
1284 KOKKOS_INLINE_FUNCTION void
1285 GEMV (const char trans,
1286  const CoefficientType& alpha,
1287  const LittleBlockType& A,
1288  const LittleVectorTypeX& x,
1289  const CoefficientType& beta,
1290  const LittleVectorTypeY& y)
1291 {
1292  // y(0) returns a reference to the 0-th entry of y. Remove that
1293  // reference to get the type of each entry of y. It's OK if y has
1294  // zero entries -- this doesn't actually do y(i), it just returns
1295  // the type of that expression.
1296  typedef typename std::remove_reference<decltype (y(0)) >::type y_value_type;
1297  const IndexType numRows = static_cast<IndexType> (A.dimension_0 ());
1298  const IndexType numCols = static_cast<IndexType> (A.dimension_1 ());
1299 
1300  if (beta == 0.0) {
1301  if (alpha == 0.0) {
1302  for (IndexType i = 0; i < numRows; ++i) {
1303  y(i) = 0.0;
1304  }
1305  }
1306  else {
1307  for (IndexType i = 0; i < numRows; ++i) {
1308  y_value_type y_i = 0.0;
1309  for (IndexType j = 0; j < numCols; ++j) {
1310  y_i += A(i,j) * x(j);
1311  }
1312  y(i) = y_i;
1313  }
1314  }
1315  }
1316  else { // beta != 0
1317  if (alpha == 0.0) {
1318  if (beta == 0.0) {
1319  for (IndexType i = 0; i < numRows; ++i) {
1320  y(i) = 0.0;
1321  }
1322  }
1323  else {
1324  for (IndexType i = 0; i < numRows; ++i) {
1325  y(i) *= beta;
1326  }
1327  }
1328  }
1329  else {
1330  for (IndexType i = 0; i < numRows; ++i) {
1331  y_value_type y_i = beta * y(i);
1332  for (IndexType j = 0; j < numCols; ++j) {
1333  y_i += alpha * A(i,j) * x(j);
1334  }
1335  y(i) = y_i;
1336  }
1337  }
1338  }
1339 }
1340 
1341 #endif // 0
1342 
1343 } // namespace Experimental
1344 } // namespace Tpetra
1345 
1346 #endif // TPETRA_EXPERIMENTAL_BLOCKVIEW_HPP
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha*x (rank-1 x and y, i.e., vectors)
Implementation of Tpetra&#39;s ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, or the small dense vectors in BlockMultiVector and BlockVector.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &x, const ViewType2 &y)
y := x (rank-1 x and y, i.e., vectors)
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i,j) := max(abs((*this)(i,j)), abs(X(i,j))) for all (i,j).
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
KOKKOS_INLINE_FUNCTION void GETRI(const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &work, int &info)
Compute inverse of A, using result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
Implementation of Tpetra::Experimental::FILL function.
Implementation of Tpetra::Experimental::SCAL function.
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra&#39;s ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, and the small dense vectors in BlockMultiVector and BlockVector.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
Replace old values with zero.
KOKKOS_INLINE_FUNCTION void GETF2(const LittleBlockType &A, const LittleVectorType &ipiv, int &info)
Computes A = P*L*U.
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
KOKKOS_INLINE_FUNCTION void GEMM(const char transA[], const char transB[], const CoefficientType &alpha, const ViewType1 &A, const ViewType2 &B, const CoefficientType &beta, const ViewType3 &C)
Small dense matrix-matrix multiply: C := alpha*A*B + beta*C
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i) := max(abs((*this)(i)), abs(X(i))) for all i.
KOKKOS_INLINE_FUNCTION void GETRS(const char mode[], const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &B, int &info)
Solve the linear system(s) AX=B, using the result of GETRF or GETF2.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &x)
x := alpha*x (rank-1 x, i.e., a vector)
Implementation of Tpetra::Experimental::COPY function.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
Implementation of Tpetra::Experimental::AXPY function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)