460 auto policy = dataExtentRangePolicy<rank>();
492 const auto & variationTypes = data.getVariationTypes();
493 for (
int d=0; d<
rank; d++)
495 if (variationTypes[d] ==
GENERAL)
505 auto thisAE = constArg;
508 auto & this_underlying = this->getUnderlyingView<1>();
511 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
513 else if (this_full && A_full && B_full)
515 auto thisAE = fullArgs;
519 auto & this_underlying = this->getUnderlyingView<rank>();
523 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
531 auto thisAE = fullArgs;
532 auto & this_underlying = this->getUnderlyingView<rank>();
538 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
542 auto BAE = fullArgsConst;
549 if (B_1D && (get1DArgIndex(B) != -1) )
552 const int argIndex = get1DArgIndex(B);
554 auto & this_underlying = this->getUnderlyingView<1>();
557 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, AAE, arg0);
break;
558 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, AAE, arg1);
break;
559 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, AAE, arg2);
break;
560 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, AAE, arg3);
break;
561 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, AAE, arg4);
break;
562 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, AAE, arg5);
break;
563 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
569 auto thisAE = fullArgsWritable;
570 auto BAE = fullArgsConst;
581 auto thisAE = fullArgs;
582 auto & this_underlying = this->getUnderlyingView<rank>();
588 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
593 auto AAE = fullArgsConst;
600 if (A_1D && (get1DArgIndex(A) != -1) )
603 const int argIndex = get1DArgIndex(A);
605 auto & this_underlying = this->getUnderlyingView<1>();
608 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, arg0, BAE);
break;
609 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, arg1, BAE);
break;
610 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, arg2, BAE);
break;
611 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, arg3, BAE);
break;
612 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, arg4, BAE);
break;
613 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, arg5, BAE);
break;
614 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
620 auto thisAE = fullArgsWritable;
621 auto AAE = fullArgsConst;
628 if (this_1D && (get1DArgIndex(thisData) != -1))
635 const int argThis = get1DArgIndex(thisData);
636 const int argA = get1DArgIndex(A);
637 const int argB = get1DArgIndex(B);
641 auto & this_underlying = this->getUnderlyingView<1>();
642 if ((argA != -1) && (argB != -1))
644#ifdef INTREPID2_HAVE_DEBUG
645 INTREPID2_TEST_FOR_EXCEPTION(argA != argThis, std::logic_error,
"Unexpected 1D arg combination.");
646 INTREPID2_TEST_FOR_EXCEPTION(argB != argThis, std::logic_error,
"Unexpected 1D arg combination.");
650 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, arg0, arg0);
break;
651 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, arg1, arg1);
break;
652 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, arg2, arg2);
break;
653 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, arg3, arg3);
break;
654 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, arg4, arg4);
break;
655 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, arg5, arg5);
break;
656 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
664 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg0, arg0, fullArgsConst);
break;
665 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg1, arg1, fullArgsConst);
break;
666 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg2, arg2, fullArgsConst);
break;
667 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg3, arg3, fullArgsConst);
break;
668 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg4, arg4, fullArgsConst);
break;
669 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg5, arg5, fullArgsConst);
break;
670 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
678 case 0:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg0, fullArgsConst, arg0);
break;
679 case 1:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg1, fullArgsConst, arg1);
break;
680 case 2:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg2, fullArgsConst, arg2);
break;
681 case 3:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg3, fullArgsConst, arg3);
break;
682 case 4:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg4, fullArgsConst, arg4);
break;
683 case 5:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg5, fullArgsConst, arg5);
break;
684 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
691 auto & this_underlying = this->getUnderlyingView<rank>();
692 auto thisAE = fullArgs;
699 if (B_1D && (get1DArgIndex(B) != -1))
701 const int argIndex = get1DArgIndex(B);
705 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg0);
break;
706 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg1);
break;
707 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg2);
break;
708 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg3);
break;
709 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg4);
break;
710 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg5);
break;
711 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
718 auto BAE = fullArgsConst;
724 if (A_1D && (get1DArgIndex(A) != -1))
726 const int argIndex = get1DArgIndex(A);
734 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg0, BAE);
break;
735 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg1, BAE);
break;
736 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg2, BAE);
break;
737 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg3, BAE);
break;
738 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg4, BAE);
break;
739 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg5, BAE);
break;
740 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
745 auto BAE = fullArgsConst;
748 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg0, BAE);
break;
749 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg1, BAE);
break;
750 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg2, BAE);
break;
751 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg3, BAE);
break;
752 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg4, BAE);
break;
753 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg5, BAE);
break;
754 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
761 auto AAE = fullArgsConst;
762 auto BAE = fullArgsConst;
770 auto thisAE = fullArgsWritable;
771 auto AAE = fullArgsConst;
772 auto BAE = fullArgsConst;
1050 Data(std::vector<DimensionInfo> dimInfoVector)
1053 dataRank_(0), extents_({0,0,0,0,0,0,0}), variationType_({
CONSTANT,
CONSTANT,
CONSTANT,
CONSTANT,
CONSTANT,
CONSTANT,
CONSTANT}), blockPlusDiagonalLastNonDiagonal_(-1), rank_(dimInfoVector.size())
1057 if (dimInfoVector.size() != 0)
1059 std::vector<int> dataExtents;
1061 bool blockPlusDiagonalEncountered =
true;
1062 for (
int d=0; d<rank_; d++)
1064 const DimensionInfo & dimInfo = dimInfoVector[d];
1065 extents_[d] = dimInfo.logicalExtent;
1066 variationType_[d] = dimInfo.variationType;
1067 const bool isBlockPlusDiagonal = (variationType_[d] == BLOCK_PLUS_DIAGONAL);
1068 const bool isSecondBlockPlusDiagonal = isBlockPlusDiagonal && blockPlusDiagonalEncountered;
1069 if (isBlockPlusDiagonal)
1071 blockPlusDiagonalEncountered =
true;
1072 blockPlusDiagonalLastNonDiagonal_ = dimInfo.blockPlusDiagonalLastNonDiagonal;
1074 if ((variationType_[d] != CONSTANT) && (!isSecondBlockPlusDiagonal))
1076 dataExtents.push_back(dimInfo.dataExtent);
1079 if (dataExtents.size() == 0)
1082 dataExtents.push_back(1);
1084 dataRank_ = dataExtents.size();
1087 case 1: data1_ = Kokkos::View<DataScalar*, DeviceType>(
"Intrepid2 Data", dataExtents[0]);
break;
1088 case 2: data2_ = Kokkos::View<DataScalar**, DeviceType>(
"Intrepid2 Data", dataExtents[0], dataExtents[1]);
break;
1089 case 3: data3_ = Kokkos::View<DataScalar***, DeviceType>(
"Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2]);
break;
1090 case 4: data4_ = Kokkos::View<DataScalar****, DeviceType>(
"Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3]);
break;
1091 case 5: data5_ = Kokkos::View<DataScalar*****, DeviceType>(
"Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3], dataExtents[4]);
break;
1092 case 6: data6_ = Kokkos::View<DataScalar******, DeviceType>(
"Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3], dataExtents[4], dataExtents[5]);
break;
1093 case 7: data7_ = Kokkos::View<DataScalar*******, DeviceType>(
"Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3], dataExtents[4], dataExtents[5], dataExtents[6]);
break;
1094 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid data rank");
1147 case 1: data1_ = Kokkos::View<DataScalar*, DeviceType>(
"Intrepid2 Data", view.extent_int(0));
break;
1148 case 2: data2_ = Kokkos::View<DataScalar**, DeviceType>(
"Intrepid2 Data", view.extent_int(0), view.extent_int(1));
break;
1149 case 3: data3_ = Kokkos::View<DataScalar***, DeviceType>(
"Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2));
break;
1150 case 4: data4_ = Kokkos::View<DataScalar****, DeviceType>(
"Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3));
break;
1151 case 5: data5_ = Kokkos::View<DataScalar*****, DeviceType>(
"Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4));
break;
1152 case 6: data6_ = Kokkos::View<DataScalar******, DeviceType>(
"Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4), view.extent_int(5));
break;
1153 case 7: data7_ = Kokkos::View<DataScalar*******, DeviceType>(
"Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4), view.extent_int(5), view.extent_int(6));
break;
1154 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid data rank");
1160 using MemorySpace =
typename DeviceType::memory_space;
1163 case 1: {
auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.
getUnderlyingView1());
copyContainer(data1_, dataViewMirror);}
break;
1164 case 2: {
auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.
getUnderlyingView2());
copyContainer(data2_, dataViewMirror);}
break;
1165 case 3: {
auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.
getUnderlyingView3());
copyContainer(data3_, dataViewMirror);}
break;
1166 case 4: {
auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.
getUnderlyingView4());
copyContainer(data4_, dataViewMirror);}
break;
1167 case 5: {
auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.
getUnderlyingView5());
copyContainer(data5_, dataViewMirror);}
break;
1168 case 6: {
auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.
getUnderlyingView6());
copyContainer(data6_, dataViewMirror);}
break;
1169 case 7: {
auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.
getUnderlyingView7());
copyContainer(data7_, dataViewMirror);}
break;
1170 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid data rank");
1821 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_MatData.
rank() != B_MatData.
rank(), std::invalid_argument,
"AmatData and BmatData have incompatible ranks");
1823 const int D1_DIM = A_MatData.
rank() - 2;
1824 const int D2_DIM = A_MatData.
rank() - 1;
1826 const int A_rows = A_MatData.
extent_int(D1_DIM);
1827 const int A_cols = A_MatData.
extent_int(D2_DIM);
1828 const int B_rows = B_MatData.
extent_int(D1_DIM);
1829 const int B_cols = B_MatData.
extent_int(D2_DIM);
1831 const int leftRows = transposeA ? A_cols : A_rows;
1832 const int leftCols = transposeA ? A_rows : A_cols;
1833 const int rightRows = transposeB ? B_cols : B_rows;
1834 const int rightCols = transposeB ? B_rows : B_cols;
1836 INTREPID2_TEST_FOR_EXCEPTION(leftCols != rightRows, std::invalid_argument,
"incompatible matrix dimensions");
1838 Kokkos::Array<int,7> resultExtents;
1839 Kokkos::Array<DataVariationType,7> resultVariationTypes;
1841 resultExtents[D1_DIM] = leftRows;
1842 resultExtents[D2_DIM] = rightCols;
1843 int resultBlockPlusDiagonalLastNonDiagonal = -1;
1852 const int resultRank = A_MatData.
rank();
1857 Kokkos::Array<int,7> resultActiveDims;
1858 Kokkos::Array<int,7> resultDataDims;
1859 int resultNumActiveDims = 0;
1861 for (
int i=0; i<resultRank-2; i++)
1863 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_MatData.
extent_int(i) != B_MatData.
extent_int(i), std::invalid_argument,
"A and B extents must match in each non-matrix dimension");
1871 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_VariationType ==
BLOCK_PLUS_DIAGONAL, std::invalid_argument,
"unsupported variationType");
1872 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(B_VariationType ==
BLOCK_PLUS_DIAGONAL, std::invalid_argument,
"unsupported variationType");
1876 if ((A_VariationType ==
GENERAL) || (B_VariationType ==
GENERAL))
1878 resultVariationType =
GENERAL;
1879 dataSize = resultExtents[i];
1886 else if ((B_VariationType ==
MODULAR) && (A_VariationType ==
CONSTANT))
1888 resultVariationType =
MODULAR;
1891 else if ((B_VariationType ==
CONSTANT) && (A_VariationType ==
MODULAR))
1893 resultVariationType =
MODULAR;
1901 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_Modulus != B_Modulus, std::invalid_argument,
"If both matrices have variation type MODULAR, they must agree on the modulus");
1902 resultVariationType =
MODULAR;
1903 dataSize = A_Modulus;
1905 resultVariationTypes[i] = resultVariationType;
1907 if (resultVariationType !=
CONSTANT)
1909 resultActiveDims[resultNumActiveDims] = i;
1910 resultDataDims[resultNumActiveDims] = dataSize;
1911 resultNumActiveDims++;
1916 resultExtents[D1_DIM] = leftRows;
1917 resultExtents[D2_DIM] = rightCols;
1926 resultActiveDims[resultNumActiveDims] = resultRank - 2;
1928 const int numDiagonalEntries = leftRows - (resultBlockPlusDiagonalLastNonDiagonal + 1);
1929 const int numNondiagonalEntries = (resultBlockPlusDiagonalLastNonDiagonal + 1) * (resultBlockPlusDiagonalLastNonDiagonal + 1);
1931 resultDataDims[resultNumActiveDims] = numDiagonalEntries + numNondiagonalEntries;
1932 resultNumActiveDims++;
1937 resultVariationTypes[D1_DIM] =
GENERAL;
1938 resultVariationTypes[D2_DIM] =
GENERAL;
1940 resultActiveDims[resultNumActiveDims] = resultRank - 2;
1941 resultActiveDims[resultNumActiveDims+1] = resultRank - 1;
1943 resultDataDims[resultNumActiveDims] = leftRows;
1944 resultDataDims[resultNumActiveDims+1] = rightCols;
1945 resultNumActiveDims += 2;
1948 for (
int i=resultRank; i<7; i++)
1950 resultVariationTypes[i] =
CONSTANT;
1951 resultExtents[i] = 1;
1954 ScalarView<DataScalar,DeviceType> data;
1955 if (resultNumActiveDims == 1)
1960 else if (resultNumActiveDims == 2)
1965 else if (resultNumActiveDims == 3)
1968 data =
getMatchingViewWithLabel(viewToMatch,
"Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2]);
1970 else if (resultNumActiveDims == 4)
1973 data =
getMatchingViewWithLabel(viewToMatch,
"Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
1976 else if (resultNumActiveDims == 5)
1979 data =
getMatchingViewWithLabel(viewToMatch,
"Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
1980 resultDataDims[3], resultDataDims[4]);
1982 else if (resultNumActiveDims == 6)
1985 data =
getMatchingViewWithLabel(viewToMatch,
"Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
1986 resultDataDims[3], resultDataDims[4], resultDataDims[5]);
1991 data =
getMatchingViewWithLabel(viewToMatch,
"Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
1992 resultDataDims[3], resultDataDims[4], resultDataDims[5], resultDataDims[6]);
2003 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(matData.
rank() != vecData.
rank() + 1, std::invalid_argument,
"matData and vecData have incompatible ranks");
2008 const int resultRank = vecData.
rank();
2010 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(matCols != vecDim, std::invalid_argument,
"matData column count != vecData dimension");
2012 Kokkos::Array<int,7> resultExtents;
2013 Kokkos::Array<DataVariationType,7> resultVariationTypes;
2017 Kokkos::Array<int,7> resultActiveDims;
2018 Kokkos::Array<int,7> resultDataDims;
2019 int resultNumActiveDims = 0;
2021 for (
int i=0; i<resultRank-1; i++)
2024 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vecData.
extent_int(i) != matData.
extent_int(i), std::invalid_argument,
"matData and vecData extents must match in each non-matrix/vector dimension");
2030 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vecVariationType ==
BLOCK_PLUS_DIAGONAL, std::invalid_argument,
"unsupported variationType");
2031 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(matVariationType ==
BLOCK_PLUS_DIAGONAL, std::invalid_argument,
"unsupported variationType");
2035 if ((vecVariationType ==
GENERAL) || (matVariationType ==
GENERAL))
2037 resultVariationType =
GENERAL;
2038 dataSize = resultExtents[i];
2045 else if ((matVariationType ==
MODULAR) && (vecVariationType ==
CONSTANT))
2047 resultVariationType =
MODULAR;
2050 else if ((matVariationType ==
CONSTANT) && (vecVariationType ==
MODULAR))
2052 resultVariationType =
MODULAR;
2060 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(matModulus != vecModulus, std::invalid_argument,
"If matrix and vector both have variation type MODULAR, they must agree on the modulus");
2061 resultVariationType =
MODULAR;
2062 dataSize = matModulus;
2064 resultVariationTypes[i] = resultVariationType;
2066 if (resultVariationType !=
CONSTANT)
2068 resultActiveDims[resultNumActiveDims] = i;
2069 resultDataDims[resultNumActiveDims] = dataSize;
2070 resultNumActiveDims++;
2075 resultVariationTypes[resultNumActiveDims] =
GENERAL;
2076 resultActiveDims[resultNumActiveDims] = resultRank - 1;
2077 resultDataDims[resultNumActiveDims] = matRows;
2078 resultExtents[resultRank-1] = matRows;
2079 resultNumActiveDims++;
2081 for (
int i=resultRank; i<7; i++)
2083 resultVariationTypes[i] =
CONSTANT;
2084 resultExtents[i] = 1;
2087 ScalarView<DataScalar,DeviceType> data;
2088 if (resultNumActiveDims == 1)
2092 else if (resultNumActiveDims == 2)
2096 else if (resultNumActiveDims == 3)
2100 else if (resultNumActiveDims == 4)
2105 else if (resultNumActiveDims == 5)
2108 resultDataDims[3], resultDataDims[4]);
2110 else if (resultNumActiveDims == 6)
2113 resultDataDims[3], resultDataDims[4], resultDataDims[5]);
2118 resultDataDims[3], resultDataDims[4], resultDataDims[5], resultDataDims[6]);
2337 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_MatData.
rank() != B_MatData.
rank(), std::invalid_argument,
"AmatData and BmatData have incompatible ranks");
2339 const int D1_DIM = A_MatData.
rank() - 2;
2340 const int D2_DIM = A_MatData.
rank() - 1;
2342 const int A_rows = A_MatData.
extent_int(D1_DIM);
2343 const int A_cols = A_MatData.
extent_int(D2_DIM);
2344 const int B_rows = B_MatData.
extent_int(D1_DIM);
2345 const int B_cols = B_MatData.
extent_int(D2_DIM);
2347 const int leftRows = transposeA ? A_cols : A_rows;
2348 const int leftCols = transposeA ? A_rows : A_cols;
2349 const int rightCols = transposeB ? B_rows : B_cols;
2351#ifdef INTREPID2_HAVE_DEBUG
2352 const int rightRows = transposeB ? B_cols : B_rows;
2353 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(leftCols != rightRows, std::invalid_argument,
"inner dimensions do not match");
2359 using ExecutionSpace =
typename DeviceType::execution_space;
2361 const int diagonalStart = (variationType_[D1_DIM] ==
BLOCK_PLUS_DIAGONAL) ? blockPlusDiagonalLastNonDiagonal_ + 1 : leftRows;
2366 auto policy = Kokkos::RangePolicy<ExecutionSpace>(0,
getDataExtent(0));
2367 Kokkos::parallel_for(
"compute mat-mat", policy,
2368 KOKKOS_LAMBDA (
const int &matrixOrdinal) {
2369 for (
int i=0; i<diagonalStart; i++)
2371 for (
int j=0; j<rightCols; j++)
2375 for (
int k=0; k<leftCols; k++)
2377 const auto & left = transposeA ? A_MatData(matrixOrdinal,k,i) : A_MatData(matrixOrdinal,i,k);
2378 const auto & right = transposeB ? B_MatData(matrixOrdinal,j,k) : B_MatData(matrixOrdinal,k,j);
2379 val_ij += left * right;
2383 for (
int i=diagonalStart; i<leftRows; i++)
2386 const auto & left = A_MatData(matrixOrdinal,i,i);
2387 const auto & right = B_MatData(matrixOrdinal,i,i);
2388 val_ii = left * right;
2392 else if (rank_ == 4)
2396 if (underlyingMatchesLogical_)
2398 Kokkos::parallel_for(
"compute mat-mat", policy,
2399 KOKKOS_LAMBDA (
const int &cellOrdinal,
const int &pointOrdinal) {
2400 for (
int i=0; i<leftCols; i++)
2402 for (
int j=0; j<rightCols; j++)
2406 for (
int k=0; k<leftCols; k++)
2408 const auto & left = transposeA ? A_MatData(cellOrdinal,pointOrdinal,k,i) : A_MatData(cellOrdinal,pointOrdinal,i,k);
2409 const auto & right = transposeB ? B_MatData(cellOrdinal,pointOrdinal,j,k) : B_MatData(cellOrdinal,pointOrdinal,k,j);
2410 val_ij += left * right;
2418 Kokkos::parallel_for(
"compute mat-mat", policy,
2419 KOKKOS_LAMBDA (
const int &cellOrdinal,
const int &pointOrdinal) {
2420 for (
int i=0; i<diagonalStart; i++)
2422 for (
int j=0; j<rightCols; j++)
2426 for (
int k=0; k<leftCols; k++)
2428 const auto & left = transposeA ? A_MatData(cellOrdinal,pointOrdinal,k,i) : A_MatData(cellOrdinal,pointOrdinal,i,k);
2429 const auto & right = transposeB ? B_MatData(cellOrdinal,pointOrdinal,j,k) : B_MatData(cellOrdinal,pointOrdinal,k,j);
2430 val_ij += left * right;
2434 for (
int i=diagonalStart; i<leftRows; i++)
2437 const auto & left = A_MatData(cellOrdinal,pointOrdinal,i,i);
2438 const auto & right = B_MatData(cellOrdinal,pointOrdinal,i,i);
2439 val_ii = left * right;
2447 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(
true, std::logic_error,
"rank not yet supported");