42 #ifndef TPETRA_EXPERIMENTAL_BLOCKVIEW_HPP 43 #define TPETRA_EXPERIMENTAL_BLOCKVIEW_HPP 53 #include "TpetraCore_config.h" 54 #include "Kokkos_ArithTraits.hpp" 55 #include "Kokkos_Complex.hpp" 70 namespace Experimental {
80 template<
class ViewType1,
82 const int rank1 = ViewType1::rank>
84 static KOKKOS_INLINE_FUNCTION
void 85 run (
const ViewType2& Y,
const ViewType1& X);
92 template<
class ViewType1,
94 struct AbsMax<ViewType1, ViewType2, 2> {
97 static KOKKOS_INLINE_FUNCTION
void 98 run (
const ViewType2& Y,
const ViewType1& X)
100 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
101 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
102 typedef typename std::remove_reference<decltype (Y(0,0)) >::type STY;
103 static_assert(! std::is_const<STY>::value,
104 "AbsMax: The type of each entry of Y must be nonconst.");
105 typedef typename std::decay<decltype (X(0,0)) >::type STX;
106 static_assert( std::is_same<STX, STY>::value,
107 "AbsMax: The type of each entry of X and Y must be the same.");
108 typedef Kokkos::Details::ArithTraits<STY> KAT;
110 const int numCols = Y.dimension_1 ();
111 const int numRows = Y.dimension_0 ();
112 for (
int j = 0; j < numCols; ++j) {
113 for (
int i = 0; i < numRows; ++i) {
115 const STX X_ij = X(i,j);
118 const auto Y_ij_abs = KAT::abs (Y_ij);
119 const auto X_ij_abs = KAT::abs (X_ij);
120 Y_ij = (Y_ij_abs >= X_ij_abs) ?
121 static_cast<STY> (Y_ij_abs) :
122 static_cast<STY
> (X_ij_abs);
132 template<
class ViewType1,
137 static KOKKOS_INLINE_FUNCTION
void 138 run (
const ViewType2& Y,
const ViewType1& X)
140 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
141 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
143 typedef typename std::remove_reference<decltype (Y(0)) >::type STY;
144 static_assert(! std::is_const<STY>::value,
145 "AbsMax: The type of each entry of Y must be nonconst.");
146 typedef typename std::remove_const<typename std::remove_reference<decltype (X(0)) >::type>::type STX;
147 static_assert( std::is_same<STX, STY>::value,
148 "AbsMax: The type of each entry of X and Y must be the same.");
149 typedef Kokkos::Details::ArithTraits<STY> KAT;
151 const int numRows = Y.dimension_0 ();
152 for (
int i = 0; i < numRows; ++i) {
154 const STX X_i = X(i);
157 const auto Y_i_abs = KAT::abs (Y_i);
158 const auto X_i_abs = KAT::abs (X_i);
159 Y_i = (Y_i_abs >= X_i_abs) ?
160 static_cast<STY> (Y_i_abs) :
161 static_cast<STY
> (X_i_abs);
172 template<
class ViewType1,
class ViewType2, const
int rank = ViewType1::rank>
173 KOKKOS_INLINE_FUNCTION
void 174 absMax (
const ViewType2& Y,
const ViewType1& X)
176 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
177 "absMax: ViewType1 and ViewType2 must have the same rank.");
185 template<
class ViewType,
186 class CoefficientType,
187 class LayoutType =
typename ViewType::array_layout,
188 class IndexType = int,
189 const int rank = ViewType::rank>
191 static KOKKOS_INLINE_FUNCTION
void 192 run (
const CoefficientType& alpha,
const ViewType& x);
197 template<
class ViewType,
198 class CoefficientType,
201 struct SCAL<ViewType, CoefficientType, LayoutType, IndexType, 1> {
203 static KOKKOS_INLINE_FUNCTION
void 204 run (
const CoefficientType& alpha,
const ViewType& x)
206 const IndexType numRows =
static_cast<IndexType
> (x.dimension_0 ());
208 for (IndexType i = 0; i < numRows; ++i) {
216 template<
class ViewType,
217 class CoefficientType,
220 struct SCAL<ViewType, CoefficientType, LayoutType, IndexType, 2> {
222 static KOKKOS_INLINE_FUNCTION
void 223 run (
const CoefficientType& alpha,
const ViewType& A)
225 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
226 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
229 for (IndexType i = 0; i < numRows; ++i) {
230 for (IndexType j = 0; j < numCols; ++j) {
231 A(i,j) = alpha * A(i,j);
242 template<
class ViewType,
243 class CoefficientType,
245 struct SCAL<ViewType, CoefficientType,
Kokkos::LayoutRight, IndexType, 2> {
247 static KOKKOS_INLINE_FUNCTION
void 248 run (
const CoefficientType& alpha,
const ViewType& A)
250 const IndexType N = A.size ();
251 typedef typename std::decay<decltype (A(0,0)) >::type scalar_type;
252 scalar_type*
const A_raw = A.ptr_on_device ();
254 for (IndexType i = 0; i < N; ++i) {
255 A_raw[i] = alpha * A_raw[i];
265 template<
class ViewType,
267 class LayoutType =
typename ViewType::array_layout,
268 class IndexType = int,
269 const int rank = ViewType::rank>
271 static KOKKOS_INLINE_FUNCTION
void 272 run (
const ViewType& x,
const InputType& val);
277 template<
class ViewType,
281 struct FILL<ViewType, InputType, LayoutType, IndexType, 1> {
282 static KOKKOS_INLINE_FUNCTION
void 283 run (
const ViewType& x,
const InputType& val)
285 const IndexType numRows =
static_cast<IndexType
> (x.dimension_0 ());
286 for (IndexType i = 0; i < numRows; ++i) {
294 template<
class ViewType,
298 struct FILL<ViewType, InputType, LayoutType, IndexType, 2> {
299 static KOKKOS_INLINE_FUNCTION
void 300 run (
const ViewType& X,
const InputType& val)
302 const IndexType numRows =
static_cast<IndexType
> (X.dimension_0 ());
303 const IndexType numCols =
static_cast<IndexType
> (X.dimension_1 ());
304 for (IndexType j = 0; j < numCols; ++j) {
305 for (IndexType i = 0; i < numRows; ++i) {
316 template<
class CoefficientType,
319 class LayoutType1 =
typename ViewType1::array_layout,
320 class LayoutType2 =
typename ViewType2::array_layout,
321 class IndexType = int,
322 const int rank = ViewType1::rank>
324 static KOKKOS_INLINE_FUNCTION
void 325 run (
const CoefficientType& alpha,
332 template<
class CoefficientType,
338 struct AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 1> {
340 static KOKKOS_INLINE_FUNCTION
void 341 run (
const CoefficientType& alpha,
345 using Kokkos::Details::ArithTraits;
346 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
347 "AXPY: x and y must have the same rank.");
349 const IndexType numRows =
static_cast<IndexType
> (y.dimension_0 ());
350 if (alpha != ArithTraits<CoefficientType>::zero ()) {
351 for (IndexType i = 0; i < numRows; ++i) {
352 y(i) += alpha * x(i);
360 template<
class CoefficientType,
366 struct AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 2> {
368 static KOKKOS_INLINE_FUNCTION
void 369 run (
const CoefficientType& alpha,
373 using Kokkos::Details::ArithTraits;
374 static_assert (ViewType1::rank == ViewType2::rank,
375 "AXPY: X and Y must have the same rank.");
376 const IndexType numRows =
static_cast<IndexType
> (Y.dimension_0 ());
377 const IndexType numCols =
static_cast<IndexType
> (Y.dimension_1 ());
379 if (alpha != ArithTraits<CoefficientType>::zero ()) {
380 for (IndexType i = 0; i < numRows; ++i) {
381 for (IndexType j = 0; j < numCols; ++j) {
382 Y(i,j) += alpha * X(i,j);
392 template<
class CoefficientType,
396 struct AXPY<CoefficientType, ViewType1, ViewType2,
Kokkos::LayoutRight, Kokkos::LayoutRight, IndexType, 2> {
398 static KOKKOS_INLINE_FUNCTION
void 399 run (
const CoefficientType& alpha,
403 using Kokkos::Details::ArithTraits;
404 static_assert (static_cast<int> (ViewType1::rank) ==
405 static_cast<int> (ViewType2::rank),
406 "AXPY: X and Y must have the same rank.");
407 typedef typename std::decay<decltype (X(0,0)) >::type SX;
408 typedef typename std::decay<decltype (Y(0,0)) >::type SY;
410 const IndexType N =
static_cast<IndexType
> (Y.size ());
411 const SX*
const X_raw = X.ptr_on_device ();
412 SY*
const Y_raw = Y.ptr_on_device ();
414 if (alpha != ArithTraits<CoefficientType>::zero ()) {
415 for (IndexType i = 0; i < N; ++i) {
416 Y_raw[i] += alpha * X_raw[i];
425 template<
class CoefficientType,
429 struct AXPY<CoefficientType, ViewType1, ViewType2,
Kokkos::LayoutLeft, Kokkos::LayoutLeft, IndexType, 2> {
431 static KOKKOS_INLINE_FUNCTION
void 432 run (
const CoefficientType& alpha,
436 using Kokkos::Details::ArithTraits;
437 static_assert (ViewType1::rank == ViewType2::rank,
438 "AXPY: X and Y must have the same rank.");
439 typedef typename std::decay<decltype (X(0,0)) >::type SX;
440 typedef typename std::decay<decltype (Y(0,0)) >::type SY;
442 const IndexType N =
static_cast<IndexType
> (Y.size ());
443 const SX*
const X_raw = X.ptr_on_device ();
444 SY*
const Y_raw = Y.ptr_on_device ();
446 if (alpha != ArithTraits<CoefficientType>::zero ()) {
447 for (IndexType i = 0; i < N; ++i) {
448 Y_raw[i] += alpha * X_raw[i];
458 template<
class ViewType1,
460 class LayoutType1 =
typename ViewType1::array_layout,
461 class LayoutType2 =
typename ViewType2::array_layout,
462 class IndexType = int,
463 const int rank = ViewType1::rank>
465 static KOKKOS_INLINE_FUNCTION
void 466 run (
const ViewType1& x,
const ViewType2& y);
471 template<
class ViewType1,
476 struct COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 1> {
478 static KOKKOS_INLINE_FUNCTION
void 479 run (
const ViewType1& x,
const ViewType2& y)
481 const IndexType numRows =
static_cast<IndexType
> (x.dimension_0 ());
482 for (IndexType i = 0; i < numRows; ++i) {
490 template<
class ViewType1,
495 struct COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 2> {
497 static KOKKOS_INLINE_FUNCTION
void 498 run (
const ViewType1& X,
const ViewType2& Y)
500 const IndexType numRows =
static_cast<IndexType
> (Y.dimension_0 ());
501 const IndexType numCols =
static_cast<IndexType
> (Y.dimension_1 ());
504 for (IndexType i = 0; i < numRows; ++i) {
505 for (IndexType j = 0; j < numCols; ++j) {
515 template<
class ViewType1,
518 struct COPY<ViewType1, ViewType2,
Kokkos::LayoutRight, Kokkos::LayoutRight, IndexType, 2> {
520 static KOKKOS_INLINE_FUNCTION
void 521 run (
const ViewType1& X,
const ViewType2& Y)
523 typedef typename std::decay<decltype (X(0,0)) >::type SX;
524 typedef typename std::decay<decltype (Y(0,0)) >::type SY;
526 const IndexType N =
static_cast<IndexType
> (Y.size ());
527 const SX*
const X_raw = X.ptr_on_device ();
528 SY*
const Y_raw = Y.ptr_on_device ();
531 for (IndexType i = 0; i < N; ++i) {
540 template<
class ViewType1,
543 struct COPY<ViewType1, ViewType2,
Kokkos::LayoutLeft, Kokkos::LayoutLeft, IndexType, 2> {
545 static KOKKOS_INLINE_FUNCTION
void 546 run (
const ViewType1& X,
const ViewType2& Y)
548 typedef typename std::decay<decltype (X(0,0)) >::type SX;
549 typedef typename std::decay<decltype (Y(0,0)) >::type SY;
551 const IndexType N =
static_cast<IndexType
> (Y.size ());
552 const SX*
const X_raw = X.ptr_on_device ();
553 SY*
const Y_raw = Y.ptr_on_device ();
556 for (IndexType i = 0; i < N; ++i) {
563 template<
class VecType1,
567 class IndexType = int,
568 class VecLayoutType1 =
typename VecType1::array_layout,
569 class BlkLayoutType =
typename BlkType::array_layout,
570 class VecLayoutType2 =
typename VecType2::array_layout>
572 static KOKKOS_INLINE_FUNCTION
void 573 run (
const CoeffType& alpha,
578 static_assert (VecType1::rank == 1,
"GEMV: VecType1 must have rank 1.");
579 static_assert (BlkType::rank == 2,
"GEMV: BlkType must have rank 2.");
580 static_assert (VecType2::rank == 1,
"GEMV: VecType2 must have rank 1.");
581 typedef typename std::decay<decltype (y(0)) >::type y_elt_type;
583 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
584 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
586 for (IndexType i = 0; i < numRows; ++i) {
587 y_elt_type y_i = y(i);
588 for (IndexType j = 0; j < numCols; ++j) {
589 y_i += alpha * A(i,j) * x(j);
596 template<
class VecType1,
601 struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType,
602 Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight>
604 static KOKKOS_INLINE_FUNCTION
void 605 run (
const CoeffType& alpha,
610 static_assert (VecType1::rank == 1,
"GEMV: VecType1 must have rank 1.");
611 static_assert (BlkType::rank == 2,
"GEMV: BlkType must have rank 2.");
612 static_assert (VecType2::rank == 1,
"GEMV: VecType2 must have rank 1.");
613 typedef typename std::decay<decltype (y(0)) >::type y_elt_type;
614 typedef typename std::decay<decltype (A(0,0)) >::type A_elt_type;
616 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
617 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
618 const A_elt_type*
const A_raw = A.ptr_on_device ();
620 for (IndexType i = 0; i < numRows; ++i) {
621 y_elt_type y_i = y(i);
622 const A_elt_type*
const A_i = A_raw + i*numCols;
623 for (IndexType j = 0; j < numCols; ++j) {
624 y_i += alpha * A_i[j] * x(j);
631 template<
class VecType1,
636 struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType,
637 Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft>
639 static KOKKOS_INLINE_FUNCTION
void 640 run (
const CoeffType& alpha,
645 static_assert (VecType1::rank == 1,
"GEMV: VecType1 must have rank 1.");
646 static_assert (BlkType::rank == 2,
"GEMV: BlkType must have rank 2.");
647 static_assert (VecType2::rank == 1,
"GEMV: VecType2 must have rank 1.");
648 typedef typename std::decay<decltype (A(0,0)) >::type A_elt_type;
650 const A_elt_type*
const A_raw = A.ptr_on_device ();
651 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
652 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
653 for (IndexType j = 0; j < numCols; ++j) {
654 const A_elt_type*
const A_j = A_raw + j*numRows;
655 for (IndexType i = 0; i < numRows; ++i) {
656 y(i) += alpha * A_j[i] * x(i);
666 template<
class ViewType,
667 class CoefficientType,
668 class LayoutType =
typename ViewType::array_layout,
669 class IndexType = int,
670 const int rank = ViewType::rank>
671 KOKKOS_INLINE_FUNCTION
void 672 SCAL (
const CoefficientType& alpha,
const ViewType& x)
678 template<
class ViewType,
680 class LayoutType =
typename ViewType::array_layout,
681 class IndexType = int,
682 const int rank = ViewType::rank>
683 KOKKOS_INLINE_FUNCTION
void 684 FILL (
const ViewType& x,
const InputType& val)
694 template<
class CoefficientType,
697 class LayoutType1 =
typename ViewType1::array_layout,
698 class LayoutType2 =
typename ViewType2::array_layout,
699 class IndexType = int,
700 const int rank = ViewType1::rank>
701 KOKKOS_INLINE_FUNCTION
void 702 AXPY (
const CoefficientType& alpha,
706 static_assert (static_cast<int> (ViewType1::rank) ==
707 static_cast<int> (ViewType2::rank),
708 "AXPY: x and y must have the same rank.");
709 Impl::AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2,
710 IndexType, rank>::run (alpha, x, y);
721 template<
class ViewType1,
723 class LayoutType1 =
typename ViewType1::array_layout,
724 class LayoutType2 =
typename ViewType2::array_layout,
725 class IndexType = int,
726 const int rank = ViewType1::rank>
727 KOKKOS_INLINE_FUNCTION
void 728 COPY (
const ViewType1& x,
const ViewType2& y)
730 static_assert (static_cast<int> (ViewType1::rank) ==
731 static_cast<int> (ViewType2::rank),
732 "COPY: x and y must have the same rank.");
733 Impl::COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType,
748 template<
class VecType1,
752 class IndexType =
int>
753 KOKKOS_INLINE_FUNCTION
void 759 Impl::GEMV<VecType1, BlkType, VecType2, CoeffType,
760 IndexType>::run (alpha, A, x, y);
770 template<
class ViewType1,
773 class CoefficientType,
774 class IndexType =
int>
775 KOKKOS_INLINE_FUNCTION
void 778 const CoefficientType& alpha,
781 const CoefficientType& beta,
785 static_assert (ViewType1::rank == 2,
"GEMM: A must have rank 2 (be a matrix).");
786 static_assert (ViewType2::rank == 2,
"GEMM: B must have rank 2 (be a matrix).");
787 static_assert (ViewType3::rank == 2,
"GEMM: C must have rank 2 (be a matrix).");
789 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
790 typedef Kokkos::Details::ArithTraits<Scalar> STS;
791 const Scalar
ZERO = STS::zero();
792 const Scalar ONE = STS::one();
796 if(transA[0] ==
'N' || transA[0] ==
'n') {
797 m =
static_cast<IndexType
> (A.dimension_0 ());
798 n =
static_cast<IndexType
> (A.dimension_1 ());
801 m =
static_cast<IndexType
> (A.dimension_1 ());
802 n =
static_cast<IndexType
> (A.dimension_0 ());
804 k =
static_cast<IndexType
> (C.dimension_1 ());
807 if(alpha == ZERO && beta == ONE)
813 for(IndexType i=0; i<m; i++) {
814 for(IndexType j=0; j<k; j++) {
820 for(IndexType i=0; i<m; i++) {
821 for(IndexType j=0; j<k; j++) {
822 C(i,j) = beta*C(i,j);
829 if(transB[0] ==
'n' || transB[0] ==
'N') {
830 if(transA[0] ==
'n' || transA[0] ==
'N') {
832 for(IndexType j=0; j<n; j++) {
834 for(IndexType i=0; i<m; i++) {
838 else if(beta != ONE) {
839 for(IndexType i=0; i<m; i++) {
840 C(i,j) = beta*C(i,j);
843 for(IndexType l=0; l<k; l++) {
844 Scalar temp = alpha*B(l,j);
845 for(IndexType i=0; i<m; i++) {
846 C(i,j) = C(i,j) + temp*A(i,l);
853 for(IndexType j=0; j<n; j++) {
854 for(IndexType i=0; i<m; i++) {
856 for(IndexType l=0; l<k; l++) {
857 temp = temp + A(l,i)*B(l,j);
863 C(i,j) = alpha*temp + beta*C(i,j);
870 if(transA[0] ==
'n' || transA[0] ==
'N') {
872 for(IndexType j=0; j<n; j++) {
874 for(IndexType i=0; i<m; i++) {
878 else if(beta != ONE) {
879 for(IndexType i=0; i<m; i++) {
880 C(i,j) = beta*C(i,j);
883 for(IndexType l=0; l<k; l++) {
884 Scalar temp = alpha*B(j,l);
885 for(IndexType i=0; i<m; i++) {
886 C(i,j) = C(i,j) + temp*A(i,l);
893 for(IndexType j=0; j<n; j++) {
894 for(IndexType i=0; i<m; i++) {
896 for(IndexType l=0; l<k; l++) {
897 temp = temp + A(l,i)*B(j,l);
903 C(i,j) = alpha*temp + beta*C(i,j);
912 template<
class LittleBlockType,
913 class LittleVectorType>
914 KOKKOS_INLINE_FUNCTION
void 915 GETF2 (
const LittleBlockType& A,
const LittleVectorType& ipiv,
int& info)
918 typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
919 static_assert (std::is_integral<IndexType>::value,
920 "GETF2: The type of each entry of ipiv must be an integer type.");
921 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
922 static_assert (! std::is_const<Scalar>::value,
923 "GETF2: A must not be a const View (or LittleBlock).");
924 static_assert (! std::is_const<std::remove_reference<decltype (ipiv(0))>>::value,
925 "GETF2: ipiv must not be a const View (or LittleBlock).");
926 static_assert (LittleBlockType::rank == 2,
"GETF2: A must have rank 2 (be a matrix).");
927 typedef Kokkos::Details::ArithTraits<Scalar> STS;
928 const Scalar
ZERO = STS::zero();
930 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
931 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
932 const IndexType pivDim =
static_cast<IndexType
> (ipiv.dimension_0 ());
935 const IndexType minPivDim = (numRows < numCols) ? numRows : numCols;
936 if (pivDim < minPivDim) {
944 for(IndexType j=0; j < pivDim; j++)
948 for(IndexType i=j+1; i<numRows; i++)
950 if(STS::abs(A(i,j)) > STS::abs(A(jp,j))) {
961 for(IndexType i=0; i < numCols; i++)
963 Scalar temp = A(jp,i);
970 for(IndexType i=j+1; i<numRows; i++) {
971 A(i,j) = A(i,j) / A(j,j);
979 for(IndexType r=j+1; r < numRows; r++)
981 for(IndexType c=j+1; c < numCols; c++) {
982 A(r,c) = A(r,c) - A(r,j) * A(j,c);
993 template<
class LittleBlockType,
994 class LittleIntVectorType,
995 class LittleScalarVectorType,
996 const int rank = LittleScalarVectorType::rank>
998 static KOKKOS_INLINE_FUNCTION
void 999 run (
const char mode[],
1000 const LittleBlockType& A,
1001 const LittleIntVectorType& ipiv,
1002 const LittleScalarVectorType& B,
1007 template<
class LittleBlockType,
1008 class LittleIntVectorType,
1009 class LittleScalarVectorType>
1010 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 1> {
1011 static KOKKOS_INLINE_FUNCTION
void 1012 run (
const char mode[],
1013 const LittleBlockType& A,
1014 const LittleIntVectorType& ipiv,
1015 const LittleScalarVectorType& B,
1019 typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1022 static_assert (std::is_integral<IndexType>::value &&
1023 std::is_signed<IndexType>::value,
1024 "GETRS: The type of each entry of ipiv must be a signed integer.");
1025 typedef typename std::decay<decltype (A(0,0))>::type Scalar;
1026 static_assert (! std::is_const<std::remove_reference<decltype (B(0))>>::value,
1027 "GETRS: B must not be a const View (or LittleBlock).");
1028 static_assert (LittleBlockType::rank == 2,
"GETRS: A must have rank 2 (be a matrix).");
1029 static_assert (LittleIntVectorType::rank == 1,
"GETRS: ipiv must have rank 1.");
1030 static_assert (LittleScalarVectorType::rank == 1,
"GETRS: For this specialization, B must have rank 1.");
1032 typedef Kokkos::Details::ArithTraits<Scalar> STS;
1033 const Scalar
ZERO = STS::zero();
1034 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
1035 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
1036 const IndexType pivDim =
static_cast<IndexType
> (ipiv.dimension_0 ());
1041 if (numRows != numCols) {
1047 if (pivDim < numRows) {
1053 if(mode[0] ==
'n' || mode[0] ==
'N') {
1055 for(IndexType i=0; i<numRows; i++) {
1056 if(ipiv(i) != i+1) {
1058 B(i) = B(ipiv(i)-1);
1059 B(ipiv(i)-1) = temp;
1064 for(IndexType r=1; r < numRows; r++) {
1065 for(IndexType c=0; c < r; c++) {
1066 B(r) = B(r) - A(r,c)*B(c);
1071 for(IndexType r=numRows-1; r >= 0; r--) {
1073 if(A(r,r) == ZERO) {
1078 for(IndexType c=r+1; c < numCols; c++) {
1079 B(r) = B(r) - A(r,c)*B(c);
1081 B(r) = B(r) / A(r,r);
1085 else if(mode[0] ==
't' || mode[0] ==
'T') {
1090 else if (mode[0] ==
'c' || mode[0] ==
'C') {
1103 template<
class LittleBlockType,
1104 class LittleIntVectorType,
1105 class LittleScalarVectorType>
1106 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 2> {
1107 static KOKKOS_INLINE_FUNCTION
void 1108 run (
const char mode[],
1109 const LittleBlockType& A,
1110 const LittleIntVectorType& ipiv,
1111 const LittleScalarVectorType& B,
1115 typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
1116 static_assert (std::is_integral<IndexType>::value,
1117 "GETRS: The type of each entry of ipiv must be an integer type.");
1118 static_assert (! std::is_const<std::remove_reference<decltype (B(0)) > >::value,
1119 "GETRS: B must not be a const View (or LittleBlock).");
1120 static_assert (LittleBlockType::rank == 2,
"GETRS: A must have rank 2 (be a matrix).");
1121 static_assert (LittleIntVectorType::rank == 1,
"GETRS: ipiv must have rank 1.");
1122 static_assert (LittleScalarVectorType::rank == 2,
"GETRS: For this specialization, B must have rank 2.");
1127 const IndexType numRhs = B.dimension_1 ();
1130 for (IndexType rhs = 0; rhs < numRhs; ++rhs) {
1131 auto B_cur = Kokkos::subview (B, Kokkos::ALL (), rhs);
1145 template<
class LittleBlockType,
1146 class LittleIntVectorType,
1147 class LittleScalarVectorType>
1148 KOKKOS_INLINE_FUNCTION
void 1150 const LittleBlockType& A,
1151 const LittleIntVectorType& ipiv,
1152 const LittleScalarVectorType& B,
1155 Impl::GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType,
1156 LittleScalarVectorType::rank>::run (mode, A, ipiv, B, info);
1174 template<
class LittleBlockType,
1175 class LittleIntVectorType,
1176 class LittleScalarVectorType>
1177 KOKKOS_INLINE_FUNCTION
void 1179 const LittleIntVectorType& ipiv,
1180 const LittleScalarVectorType& work,
1184 typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1187 static_assert (std::is_integral<IndexType>::value &&
1188 std::is_signed<IndexType>::value,
1189 "GETRI: The type of each entry of ipiv must be a signed integer.");
1190 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
1191 static_assert (! std::is_const<std::remove_reference<decltype (A(0,0))>>::value,
1192 "GETRI: A must not be a const View (or LittleBlock).");
1193 static_assert (! std::is_const<std::remove_reference<decltype (work(0))>>::value,
1194 "GETRI: work must not be a const View (or LittleBlock).");
1195 static_assert (LittleBlockType::rank == 2,
"GETRI: A must have rank 2 (be a matrix).");
1196 typedef Kokkos::Details::ArithTraits<Scalar> STS;
1197 const Scalar
ZERO = STS::zero();
1198 const Scalar ONE = STS::one();
1200 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
1201 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
1202 const IndexType pivDim =
static_cast<IndexType
> (ipiv.dimension_0 ());
1203 const IndexType workDim =
static_cast<IndexType
> (work.dimension_0 ());
1208 if (numRows != numCols) {
1214 if (pivDim < numRows) {
1220 if (workDim < numRows) {
1226 for(IndexType j=0; j < numRows; j++) {
1227 if(A(j,j) ==
ZERO) {
1232 A(j,j) = ONE / A(j,j);
1235 for(IndexType r=0; r < j; r++) {
1236 A(r,j) = A(r,r)*A(r,j);
1237 for(IndexType c=r+1; c < j; c++) {
1238 A(r,j) = A(r,j) + A(r,c)*A(c,j);
1241 for(IndexType r=0; r < j; r++) {
1242 A(r,j) = -A(j,j)*A(r,j);
1247 for(IndexType j = numCols-2; j >= 0; j--) {
1249 for(IndexType r=j+1; r < numRows; r++) {
1254 for(IndexType r=0; r < numRows; r++) {
1255 for(IndexType i=j+1; i < numRows; i++) {
1256 A(r,j) = A(r,j) - work(i)*A(r,i);
1262 for(IndexType j=numRows-1; j >= 0; j--) {
1263 IndexType jp = ipiv(j)-1;
1265 for(IndexType r=0; r < numRows; r++) {
1266 Scalar temp = A(r,j);
1279 template<
class LittleBlockType,
1280 class LittleVectorTypeX,
1281 class LittleVectorTypeY,
1282 class CoefficientType,
1283 class IndexType =
int>
1284 KOKKOS_INLINE_FUNCTION
void 1285 GEMV (
const char trans,
1286 const CoefficientType& alpha,
1287 const LittleBlockType& A,
1288 const LittleVectorTypeX& x,
1289 const CoefficientType& beta,
1290 const LittleVectorTypeY& y)
1296 typedef typename std::remove_reference<decltype (y(0)) >::type y_value_type;
1297 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
1298 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
1302 for (IndexType i = 0; i < numRows; ++i) {
1307 for (IndexType i = 0; i < numRows; ++i) {
1308 y_value_type y_i = 0.0;
1309 for (IndexType j = 0; j < numCols; ++j) {
1310 y_i += A(i,j) * x(j);
1319 for (IndexType i = 0; i < numRows; ++i) {
1324 for (IndexType i = 0; i < numRows; ++i) {
1330 for (IndexType i = 0; i < numRows; ++i) {
1331 y_value_type y_i = beta * y(i);
1332 for (IndexType j = 0; j < numCols; ++j) {
1333 y_i += alpha * A(i,j) * x(j);
1346 #endif // TPETRA_EXPERIMENTAL_BLOCKVIEW_HPP static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha*x (rank-1 x and y, i.e., vectors)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, or the small dense vectors in BlockMultiVector and BlockVector.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &x, const ViewType2 &y)
y := x (rank-1 x and y, i.e., vectors)
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i,j) := max(abs((*this)(i,j)), abs(X(i,j))) for all (i,j).
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
KOKKOS_INLINE_FUNCTION void GETRI(const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &work, int &info)
Compute inverse of A, using result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
Implementation of Tpetra::Experimental::FILL function.
Computes the solution to Ax=b.
Implementation of Tpetra::Experimental::SCAL function.
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, and the small dense vectors in BlockMultiVector and BlockVector.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
Replace old values with zero.
KOKKOS_INLINE_FUNCTION void GETF2(const LittleBlockType &A, const LittleVectorType &ipiv, int &info)
Computes A = P*L*U.
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
KOKKOS_INLINE_FUNCTION void GEMM(const char transA[], const char transB[], const CoefficientType &alpha, const ViewType1 &A, const ViewType2 &B, const CoefficientType &beta, const ViewType3 &C)
Small dense matrix-matrix multiply: C := alpha*A*B + beta*C
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i) := max(abs((*this)(i)), abs(X(i))) for all i.
KOKKOS_INLINE_FUNCTION void GETRS(const char mode[], const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &B, int &info)
Solve the linear system(s) AX=B, using the result of GETRF or GETF2.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &x)
x := alpha*x (rank-1 x, i.e., a vector)
Implementation of Tpetra::Experimental::COPY function.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
Implementation of Tpetra::Experimental::AXPY function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)