71 using ExecutionSpace =
typename DeviceType::execution_space;
72 using ScratchSpace =
typename ExecutionSpace::scratch_memory_space;
73 using OutputScratchView = Kokkos::View<OutputScalar*,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
74 using OutputScratchView2D = Kokkos::View<OutputScalar**,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
75 using PointScratchView = Kokkos::View<PointScalar*, ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
77 using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
78 using TeamMember =
typename TeamPolicy::member_type;
82 OutputFieldType output_;
83 InputPointsType inputPoints_;
86 bool defineVertexFunctions_;
87 int numFields_, numPoints_;
89 size_t fad_size_output_;
91 static const int numVertices = 4;
92 static const int numEdges = 6;
94 const int edge_start_[numEdges] = {0,1,0,0,1,2};
95 const int edge_end_[numEdges] = {1,2,2,3,3,3};
97 static const int numFaces = 4;
98 const int face_vertex_0[numFaces] = {0,0,1,0};
99 const int face_vertex_1[numFaces] = {1,1,2,2};
100 const int face_vertex_2[numFaces] = {2,3,3,3};
104 const int face_ordinal_of_first_edge[numFaces] = {0,0,1,2};
107 int polyOrder,
bool defineVertexFunctions)
108 : opType_(opType), output_(output), inputPoints_(inputPoints),
109 polyOrder_(polyOrder), defineVertexFunctions_(defineVertexFunctions),
112 numFields_ = output.extent_int(0);
113 numPoints_ = output.extent_int(1);
114 INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != inputPoints.extent_int(0), std::invalid_argument,
"point counts need to match!");
115 INTREPID2_TEST_FOR_EXCEPTION(numFields_ != (polyOrder_+1)*(polyOrder_+2)*(polyOrder_+3)/6, std::invalid_argument,
"output field size does not match basis cardinality");
118 KOKKOS_INLINE_FUNCTION
119 void operator()(
const TeamMember & teamMember )
const
121 const int numFaceBasisFunctionsPerFace = (polyOrder_-2) * (polyOrder_-1) / 2;
122 const int numInteriorBasisFunctions = (polyOrder_-3) * (polyOrder_-2) * (polyOrder_-1) / 6;
124 auto pointOrdinal = teamMember.league_rank();
125 OutputScratchView legendre_values1_at_point, legendre_values2_at_point;
126 OutputScratchView2D jacobi_values1_at_point, jacobi_values2_at_point, jacobi_values3_at_point;
127 const int numAlphaValues = (polyOrder_-1 > 1) ? (polyOrder_-1) : 1;
128 if (fad_size_output_ > 0) {
129 legendre_values1_at_point = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
130 legendre_values2_at_point = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
131 jacobi_values1_at_point = OutputScratchView2D(teamMember.team_shmem(), numAlphaValues, polyOrder_ + 1, fad_size_output_);
132 jacobi_values2_at_point = OutputScratchView2D(teamMember.team_shmem(), numAlphaValues, polyOrder_ + 1, fad_size_output_);
133 jacobi_values3_at_point = OutputScratchView2D(teamMember.team_shmem(), numAlphaValues, polyOrder_ + 1, fad_size_output_);
136 legendre_values1_at_point = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
137 legendre_values2_at_point = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
138 jacobi_values1_at_point = OutputScratchView2D(teamMember.team_shmem(), numAlphaValues, polyOrder_ + 1);
139 jacobi_values2_at_point = OutputScratchView2D(teamMember.team_shmem(), numAlphaValues, polyOrder_ + 1);
140 jacobi_values3_at_point = OutputScratchView2D(teamMember.team_shmem(), numAlphaValues, polyOrder_ + 1);
143 const auto & x = inputPoints_(pointOrdinal,0);
144 const auto & y = inputPoints_(pointOrdinal,1);
145 const auto & z = inputPoints_(pointOrdinal,2);
148 const PointScalar lambda[numVertices] = {1. - x - y - z, x, y, z};
149 const PointScalar lambda_dx[numVertices] = {-1., 1., 0., 0.};
150 const PointScalar lambda_dy[numVertices] = {-1., 0., 1., 0.};
151 const PointScalar lambda_dz[numVertices] = {-1., 0., 0., 1.};
153 const int num1DEdgeFunctions = polyOrder_ - 1;
160 for (
int vertexOrdinal=0; vertexOrdinal<numVertices; vertexOrdinal++)
162 output_(vertexOrdinal,pointOrdinal) = lambda[vertexOrdinal];
164 if (!defineVertexFunctions_)
168 output_(0,pointOrdinal) = 1.0;
172 int fieldOrdinalOffset = numVertices;
173 for (
int edgeOrdinal=0; edgeOrdinal<numEdges; edgeOrdinal++)
175 const auto & s0 = lambda[edge_start_[edgeOrdinal]];
176 const auto & s1 = lambda[ edge_end_[edgeOrdinal]];
178 Polynomials::shiftedScaledIntegratedLegendreValues(legendre_values1_at_point, polyOrder_, PointScalar(s1), PointScalar(s0+s1));
179 for (
int edgeFunctionOrdinal=0; edgeFunctionOrdinal<num1DEdgeFunctions; edgeFunctionOrdinal++)
182 output_(edgeFunctionOrdinal+fieldOrdinalOffset,pointOrdinal) = legendre_values1_at_point(edgeFunctionOrdinal+2);
184 fieldOrdinalOffset += num1DEdgeFunctions;
190 for (
int faceOrdinal=0; faceOrdinal<numFaces; faceOrdinal++)
192 const auto & s0 = lambda[face_vertex_0[faceOrdinal]];
193 const auto & s1 = lambda[face_vertex_1[faceOrdinal]];
194 const auto & s2 = lambda[face_vertex_2[faceOrdinal]];
195 const PointScalar jacobiScaling = s0 + s1 + s2;
198 for (
int n=2; n<=polyOrder_; n++)
200 const double alpha = n*2;
201 const int alphaOrdinal = n-2;
202 using Kokkos::subview;
204 auto jacobi_alpha = subview(jacobi_values1_at_point, alphaOrdinal, ALL);
205 Polynomials::integratedJacobiValues(jacobi_alpha, alpha, polyOrder_-2, s2, jacobiScaling);
208 const int edgeOrdinal = face_ordinal_of_first_edge[faceOrdinal];
209 int localFaceBasisOrdinal = 0;
210 for (
int totalPolyOrder=3; totalPolyOrder<=polyOrder_; totalPolyOrder++)
212 for (
int i=2; i<totalPolyOrder; i++)
214 const int edgeBasisOrdinal = edgeOrdinal*num1DEdgeFunctions + i-2 + numVertices;
215 const auto & edgeValue = output_(edgeBasisOrdinal,pointOrdinal);
216 const int alphaOrdinal = i-2;
218 const int j = totalPolyOrder - i;
219 const auto & jacobiValue = jacobi_values1_at_point(alphaOrdinal,j);
220 const int fieldOrdinal = fieldOrdinalOffset + localFaceBasisOrdinal;
221 output_(fieldOrdinal,pointOrdinal) = edgeValue * jacobiValue;
223 localFaceBasisOrdinal++;
226 fieldOrdinalOffset += numFaceBasisFunctionsPerFace;
230 for (
int n=3; n<=polyOrder_; n++)
232 const double alpha = n*2;
233 const double jacobiScaling = 1.0;
234 const int alphaOrdinal = n-3;
235 using Kokkos::subview;
237 auto jacobi_alpha = subview(jacobi_values1_at_point, alphaOrdinal, ALL);
238 Polynomials::integratedJacobiValues(jacobi_alpha, alpha, polyOrder_-3, lambda[3], jacobiScaling);
243 const int min_ij = min_i + min_j;
244 const int min_ijk = min_ij + min_k;
245 int localInteriorBasisOrdinal = 0;
246 for (
int totalPolyOrder_ijk=min_ijk; totalPolyOrder_ijk <= polyOrder_; totalPolyOrder_ijk++)
248 int localFaceBasisOrdinal = 0;
249 for (
int totalPolyOrder_ij=min_ij; totalPolyOrder_ij <= totalPolyOrder_ijk-min_j; totalPolyOrder_ij++)
251 for (
int i=2; i <= totalPolyOrder_ij-min_j; i++)
253 const int j = totalPolyOrder_ij - i;
254 const int k = totalPolyOrder_ijk - totalPolyOrder_ij;
255 const int faceBasisOrdinal = numEdges*num1DEdgeFunctions + numVertices + localFaceBasisOrdinal;
256 const auto & faceValue = output_(faceBasisOrdinal,pointOrdinal);
257 const int alphaOrdinal = (i+j)-3;
258 localFaceBasisOrdinal++;
260 const int fieldOrdinal = fieldOrdinalOffset + localInteriorBasisOrdinal;
261 const auto & jacobiValue = jacobi_values1_at_point(alphaOrdinal,k);
262 output_(fieldOrdinal,pointOrdinal) = faceValue * jacobiValue;
263 localInteriorBasisOrdinal++;
267 fieldOrdinalOffset += numInteriorBasisFunctions;
274 if (defineVertexFunctions_)
278 output_(0,pointOrdinal,0) = -1.0;
279 output_(0,pointOrdinal,1) = -1.0;
280 output_(0,pointOrdinal,2) = -1.0;
286 output_(0,pointOrdinal,0) = 0.0;
287 output_(0,pointOrdinal,1) = 0.0;
288 output_(0,pointOrdinal,2) = 0.0;
291 output_(1,pointOrdinal,0) = 1.0;
292 output_(1,pointOrdinal,1) = 0.0;
293 output_(1,pointOrdinal,2) = 0.0;
295 output_(2,pointOrdinal,0) = 0.0;
296 output_(2,pointOrdinal,1) = 1.0;
297 output_(2,pointOrdinal,2) = 0.0;
299 output_(3,pointOrdinal,0) = 0.0;
300 output_(3,pointOrdinal,1) = 0.0;
301 output_(3,pointOrdinal,2) = 1.0;
304 int fieldOrdinalOffset = numVertices;
316 auto & P_i_minus_1 = legendre_values1_at_point;
317 auto & L_i_dt = legendre_values2_at_point;
318 for (
int edgeOrdinal=0; edgeOrdinal<numEdges; edgeOrdinal++)
320 const auto & s0 = lambda[edge_start_[edgeOrdinal]];
321 const auto & s1 = lambda[ edge_end_[edgeOrdinal]];
323 const auto & s0_dx = lambda_dx[edge_start_[edgeOrdinal]];
324 const auto & s0_dy = lambda_dy[edge_start_[edgeOrdinal]];
325 const auto & s0_dz = lambda_dz[edge_start_[edgeOrdinal]];
326 const auto & s1_dx = lambda_dx[ edge_end_[edgeOrdinal]];
327 const auto & s1_dy = lambda_dy[ edge_end_[edgeOrdinal]];
328 const auto & s1_dz = lambda_dz[ edge_end_[edgeOrdinal]];
330 Polynomials::shiftedScaledLegendreValues (P_i_minus_1, polyOrder_-1, PointScalar(s1), PointScalar(s0+s1));
331 Polynomials::shiftedScaledIntegratedLegendreValues_dt(L_i_dt, polyOrder_, PointScalar(s1), PointScalar(s0+s1));
332 for (
int edgeFunctionOrdinal=0; edgeFunctionOrdinal<num1DEdgeFunctions; edgeFunctionOrdinal++)
335 const int i = edgeFunctionOrdinal+2;
336 output_(edgeFunctionOrdinal+fieldOrdinalOffset,pointOrdinal,0) = P_i_minus_1(i-1) * s1_dx + L_i_dt(i) * (s1_dx + s0_dx);
337 output_(edgeFunctionOrdinal+fieldOrdinalOffset,pointOrdinal,1) = P_i_minus_1(i-1) * s1_dy + L_i_dt(i) * (s1_dy + s0_dy);
338 output_(edgeFunctionOrdinal+fieldOrdinalOffset,pointOrdinal,2) = P_i_minus_1(i-1) * s1_dz + L_i_dt(i) * (s1_dz + s0_dz);
340 fieldOrdinalOffset += num1DEdgeFunctions;
361 auto & L_i = legendre_values2_at_point;
362 auto & L_2i_j_dt = jacobi_values1_at_point;
363 auto & L_2i_j = jacobi_values2_at_point;
364 auto & P_2i_j_minus_1 = jacobi_values3_at_point;
366 for (
int faceOrdinal=0; faceOrdinal<numFaces; faceOrdinal++)
368 const auto & s0 = lambda[face_vertex_0[faceOrdinal]];
369 const auto & s1 = lambda[face_vertex_1[faceOrdinal]];
370 const auto & s2 = lambda[face_vertex_2[faceOrdinal]];
371 Polynomials::shiftedScaledIntegratedLegendreValues(L_i, polyOrder_, s1, s0+s1);
373 const PointScalar jacobiScaling = s0 + s1 + s2;
376 for (
int n=2; n<=polyOrder_; n++)
378 const double alpha = n*2;
379 const int alphaOrdinal = n-2;
380 using Kokkos::subview;
382 auto L_2i_j_dt_alpha = subview(L_2i_j_dt, alphaOrdinal, ALL);
383 auto L_2i_j_alpha = subview(L_2i_j, alphaOrdinal, ALL);
384 auto P_2i_j_minus_1_alpha = subview(P_2i_j_minus_1, alphaOrdinal, ALL);
385 Polynomials::integratedJacobiValues_dt(L_2i_j_dt_alpha, alpha, polyOrder_-2, s2, jacobiScaling);
386 Polynomials::integratedJacobiValues (L_2i_j_alpha, alpha, polyOrder_-2, s2, jacobiScaling);
387 Polynomials::shiftedScaledJacobiValues(P_2i_j_minus_1_alpha, alpha, polyOrder_-1, s2, jacobiScaling);
390 const int edgeOrdinal = face_ordinal_of_first_edge[faceOrdinal];
391 int localFaceOrdinal = 0;
392 for (
int totalPolyOrder=3; totalPolyOrder<=polyOrder_; totalPolyOrder++)
394 for (
int i=2; i<totalPolyOrder; i++)
396 const int edgeBasisOrdinal = edgeOrdinal*num1DEdgeFunctions + i-2 + numVertices;
397 const auto & grad_L_i_dx = output_(edgeBasisOrdinal,pointOrdinal,0);
398 const auto & grad_L_i_dy = output_(edgeBasisOrdinal,pointOrdinal,1);
399 const auto & grad_L_i_dz = output_(edgeBasisOrdinal,pointOrdinal,2);
401 const int alphaOrdinal = i-2;
403 const auto & s0_dx = lambda_dx[face_vertex_0[faceOrdinal]];
404 const auto & s0_dy = lambda_dy[face_vertex_0[faceOrdinal]];
405 const auto & s0_dz = lambda_dz[face_vertex_0[faceOrdinal]];
406 const auto & s1_dx = lambda_dx[face_vertex_1[faceOrdinal]];
407 const auto & s1_dy = lambda_dy[face_vertex_1[faceOrdinal]];
408 const auto & s1_dz = lambda_dz[face_vertex_1[faceOrdinal]];
409 const auto & s2_dx = lambda_dx[face_vertex_2[faceOrdinal]];
410 const auto & s2_dy = lambda_dy[face_vertex_2[faceOrdinal]];
411 const auto & s2_dz = lambda_dz[face_vertex_2[faceOrdinal]];
413 int j = totalPolyOrder - i;
416 auto & l_2i_j = L_2i_j(alphaOrdinal,j);
418 auto & l_2i_j_dt = L_2i_j_dt(alphaOrdinal,j);
419 auto & p_2i_j_minus_1 = P_2i_j_minus_1(alphaOrdinal,j-1);
421 const OutputScalar basisValue_dx = l_2i_j * grad_L_i_dx + l_i * (p_2i_j_minus_1 * s2_dx + l_2i_j_dt * (s0_dx + s1_dx + s2_dx));
422 const OutputScalar basisValue_dy = l_2i_j * grad_L_i_dy + l_i * (p_2i_j_minus_1 * s2_dy + l_2i_j_dt * (s0_dy + s1_dy + s2_dy));
423 const OutputScalar basisValue_dz = l_2i_j * grad_L_i_dz + l_i * (p_2i_j_minus_1 * s2_dz + l_2i_j_dt * (s0_dz + s1_dz + s2_dz));
425 const int fieldOrdinal = fieldOrdinalOffset + localFaceOrdinal;
427 output_(fieldOrdinal,pointOrdinal,0) = basisValue_dx;
428 output_(fieldOrdinal,pointOrdinal,1) = basisValue_dy;
429 output_(fieldOrdinal,pointOrdinal,2) = basisValue_dz;
434 fieldOrdinalOffset += numFaceBasisFunctionsPerFace;
453 auto & L_alpha = jacobi_values1_at_point;
454 auto & P_alpha = jacobi_values2_at_point;
458 const auto & s0 = lambda[0];
459 const auto & s1 = lambda[1];
460 const auto & s2 = lambda[2];
462 Polynomials::shiftedScaledIntegratedLegendreValues(legendre_values1_at_point, polyOrder_, PointScalar(s1), PointScalar(s0+s1));
465 const PointScalar jacobiScaling = s0 + s1 + s2;
466 for (
int n=2; n<=polyOrder_; n++)
468 const double alpha = n*2;
469 const int alphaOrdinal = n-2;
470 using Kokkos::subview;
472 auto jacobi_alpha = subview(jacobi_values3_at_point, alphaOrdinal, ALL);
473 Polynomials::integratedJacobiValues(jacobi_alpha, alpha, polyOrder_-2, s2, jacobiScaling);
478 for (
int n=3; n<=polyOrder_; n++)
480 const double alpha = n*2;
481 const double jacobiScaling = 1.0;
482 const int alphaOrdinal = n-3;
483 using Kokkos::subview;
487 auto L = subview(L_alpha, alphaOrdinal, ALL);
488 auto P = subview(P_alpha, alphaOrdinal, ALL);
489 Polynomials::integratedJacobiValues (L, alpha, polyOrder_-3, lambda[3], jacobiScaling);
490 Polynomials::shiftedScaledJacobiValues(P, alpha, polyOrder_-3, lambda[3], jacobiScaling);
496 const int min_ij = min_i + min_j;
497 const int min_ijk = min_ij + min_k;
498 int localInteriorBasisOrdinal = 0;
499 for (
int totalPolyOrder_ijk=min_ijk; totalPolyOrder_ijk <= polyOrder_; totalPolyOrder_ijk++)
501 int localFaceBasisOrdinal = 0;
502 for (
int totalPolyOrder_ij=min_ij; totalPolyOrder_ij <= totalPolyOrder_ijk-min_j; totalPolyOrder_ij++)
504 for (
int i=2; i <= totalPolyOrder_ij-min_j; i++)
506 const int j = totalPolyOrder_ij - i;
507 const int k = totalPolyOrder_ijk - totalPolyOrder_ij;
509 const int faceBasisOrdinal = numEdges*num1DEdgeFunctions + numVertices + localFaceBasisOrdinal;
511 const auto & faceValue_dx = output_(faceBasisOrdinal,pointOrdinal,0);
512 const auto & faceValue_dy = output_(faceBasisOrdinal,pointOrdinal,1);
513 const auto & faceValue_dz = output_(faceBasisOrdinal,pointOrdinal,2);
516 OutputScalar faceValue;
518 const auto & edgeValue = legendre_values1_at_point(i);
519 const int alphaOrdinal = i-2;
520 const auto & jacobiValue = jacobi_values3_at_point(alphaOrdinal,j);
521 faceValue = edgeValue * jacobiValue;
523 localFaceBasisOrdinal++;
525 const int alphaOrdinal = (i+j)-3;
527 const int fieldOrdinal = fieldOrdinalOffset + localInteriorBasisOrdinal;
528 const auto & integratedJacobiValue = L_alpha(alphaOrdinal,k);
529 const auto & jacobiValue = P_alpha(alphaOrdinal,k-1);
530 output_(fieldOrdinal,pointOrdinal,0) = integratedJacobiValue * faceValue_dx + faceValue * jacobiValue * lambda_dx[3];
531 output_(fieldOrdinal,pointOrdinal,1) = integratedJacobiValue * faceValue_dy + faceValue * jacobiValue * lambda_dy[3];
532 output_(fieldOrdinal,pointOrdinal,2) = integratedJacobiValue * faceValue_dz + faceValue * jacobiValue * lambda_dz[3];
534 localInteriorBasisOrdinal++;
538 fieldOrdinalOffset += numInteriorBasisFunctions;
550 INTREPID2_TEST_FOR_ABORT(
true,
551 ">>> ERROR: (Intrepid2::Basis_HGRAD_TET_Cn_FEM_ORTH::OrthPolynomialTri) Computing of second and higher-order derivatives is not currently supported");
554 device_assert(
false);
561 size_t team_shmem_size (
int team_size)
const
568 const int numAlphaValues = std::max(polyOrder_-1, 1);
569 size_t shmem_size = 0;
570 if (fad_size_output_ > 0)
573 shmem_size += 2 * OutputScratchView::shmem_size(polyOrder_ + 1, fad_size_output_);
575 shmem_size += 3 * OutputScratchView2D::shmem_size(numAlphaValues, polyOrder_ + 1, fad_size_output_);
580 shmem_size += 2 * OutputScratchView::shmem_size(polyOrder_ + 1);
582 shmem_size += 3 * OutputScratchView2D::shmem_size(numAlphaValues, polyOrder_ + 1);