16#ifndef dealii_matrix_free_evaluation_kernels_face_h
17#define dealii_matrix_free_evaluation_kernels_face_h
39 template <
bool symmetric_evaluate,
65 const unsigned int subface_index,
66 const unsigned int direction)
68 if (symmetric_evaluate)
82 const unsigned int index =
83 direction == 0 ? subface_index % 2 : subface_index / 2;
94 const unsigned int n_components,
99 Number *gradients_quad,
100 Number *hessians_quad,
101 Number *scratch_data,
102 const unsigned int subface_index)
107 const std::size_t n_dofs = fe_degree > -1 ?
110 const std::size_t n_q_points =
115 Number *values_dofs_ptr = values_dofs;
119 for (
unsigned int c = 0; c < n_components; ++c)
124 eval0.template values<0, true, false>(values_dofs,
126 eval1.template values<1, true, false>(values_quad,
130 eval0.template values<0, true, false>(values_dofs,
134 values_quad[0] = values_dofs[0];
141 values_dofs += 3 * n_dofs;
142 values_quad += n_q_points;
145 for (
unsigned int c = 0; c < n_components; ++c)
150 if (symmetric_evaluate &&
153 eval0.template values<0, true, false>(values_dofs,
155 eval0.template values<1, true, false>(values_quad,
164 eval_grad.template gradients<0, true, false, 3>(
165 values_quad, gradients_quad);
166 eval_grad.template gradients<1, true, false, 3>(
167 values_quad, gradients_quad + 1);
172 eval0.template gradients<0, true, false>(values_dofs,
174 eval1.template values<1, true, false, 3>(scratch_data,
178 eval0.template values<0, true, false>(values_dofs,
180 eval1.template gradients<1, true, false, 3>(
181 scratch_data, gradients_quad + 1);
184 eval1.template values<1, true, false>(scratch_data,
188 eval0.template values<0, true, false>(values_dofs + n_dofs,
190 eval1.template values<1, true, false, 3>(scratch_data,
195 eval0.template values<0, true, false, 2>(values_dofs + n_dofs,
197 eval0.template gradients<0, true, false, 2>(values_dofs,
200 eval0.template values<0, true, false>(values_dofs,
204 values_quad[0] = values_dofs[0];
205 gradients_quad[0] = values_dofs[1];
210 values_dofs += 3 * n_dofs;
211 values_quad += n_q_points;
212 gradients_quad += dim * n_q_points;
217 values_dofs = values_dofs_ptr;
218 for (
unsigned int c = 0; c < n_components; ++c)
224 eval0.template hessians<0, true, false>(values_dofs,
226 eval1.template values<1, true, false>(scratch_data,
230 eval0.template values<0, true, false>(values_dofs,
232 eval1.template hessians<1, true, false>(scratch_data,
237 eval0.template values<0, true, false>(values_dofs +
240 eval1.template values<1, true, false>(scratch_data,
245 eval0.template gradients<0, true, false>(values_dofs,
247 eval1.template gradients<1, true, false>(scratch_data,
252 eval0.template gradients<0, true, false>(values_dofs +
255 eval1.template values<1, true, false>(scratch_data,
260 eval0.template values<0, true, false>(values_dofs + n_dofs,
262 eval1.template gradients<1, true, false>(scratch_data,
269 eval0.template hessians<0, true, false>(values_dofs,
272 eval0.template values<0, true, false>(
273 values_dofs + 2 * n_dofs, hessians_quad + n_q_points);
275 eval0.template gradients<0, true, false>(
276 values_dofs + n_dofs, hessians_quad + 2 * n_q_points);
279 hessians_quad[0] = values_dofs[2];
284 values_dofs += 3 * n_dofs;
285 hessians_quad += dim * (dim + 1) / 2 * n_q_points;
292 const unsigned int n_components,
297 Number *gradients_quad,
298 Number *hessians_quad,
299 Number *scratch_data,
300 const unsigned int subface_index)
305 const std::size_t n_dofs =
309 const std::size_t n_q_points =
314 Number *values_dofs_ptr = values_dofs;
318 for (
unsigned int c = 0; c < n_components; ++c)
323 eval1.template values<1, false, false>(values_quad,
325 eval0.template values<0, false, false>(values_quad,
329 eval0.template values<0, false, false>(values_quad,
333 values_dofs[0] = values_quad[0];
338 values_dofs += 3 * n_dofs;
339 values_quad += n_q_points;
342 for (
unsigned int c = 0; c < n_components; ++c)
348 eval1.template values<1, false, false, 3>(gradients_quad + 2,
350 eval0.template values<0, false, false>(scratch_data,
351 values_dofs + n_dofs);
352 if (symmetric_evaluate &&
363 eval_grad.template gradients<1, false, true, 3>(
364 gradients_quad + 1, values_quad);
366 eval_grad.template gradients<1, false, false, 3>(
367 gradients_quad + 1, values_quad);
368 eval_grad.template gradients<0, false, true, 3>(
369 gradients_quad, values_quad);
370 eval0.template values<1, false, false>(values_quad,
372 eval0.template values<0, false, false>(values_quad,
379 eval1.template values<1, false, false>(values_quad,
381 eval1.template gradients<1, false, true, 3>(
382 gradients_quad + 1, scratch_data);
385 eval1.template gradients<1, false, false, 3>(
386 gradients_quad + 1, scratch_data);
389 eval0.template values<0, false, false>(scratch_data,
393 eval1.template values<1, false, false, 3>(gradients_quad,
395 eval0.template gradients<0, false, true>(scratch_data,
400 eval0.template values<0, false, false, 2>(gradients_quad + 1,
403 eval0.template gradients<0, false, false, 2>(gradients_quad,
406 eval0.template values<0, false, true>(values_quad,
410 values_dofs[0] = values_quad[0];
411 values_dofs[1] = gradients_quad[0];
416 values_dofs += 3 * n_dofs;
417 values_quad += n_q_points;
418 gradients_quad += dim * n_q_points;
423 values_dofs = values_dofs_ptr;
424 for (
unsigned int c = 0; c < n_components; ++c)
430 eval1.template values<1, false, false>(hessians_quad,
434 eval0.template hessians<0, false, true>(scratch_data,
437 eval0.template hessians<0, false, false>(scratch_data,
441 eval1.template hessians<1, false, false>(hessians_quad +
444 eval0.template values<0, false, true>(scratch_data,
448 eval1.template values<1, false, false>(hessians_quad +
451 eval0.template values<0, false, false>(scratch_data,
456 eval1.template gradients<1, false, false>(hessians_quad +
459 eval0.template gradients<0, false, true>(scratch_data,
463 eval1.template values<1, false, false>(hessians_quad +
467 eval0.template gradients<0, false, true>(scratch_data,
471 eval0.template gradients<0, false, false>(scratch_data,
476 eval1.template gradients<1, false, false>(hessians_quad +
479 eval0.template values<0, false, true>(scratch_data,
480 values_dofs + n_dofs);
487 eval0.template hessians<0, false, true>(hessians_quad,
490 eval0.template hessians<0, false, false>(hessians_quad,
494 eval0.template values<0, false, false>(
495 hessians_quad + n_q_points, values_dofs + 2 * n_dofs);
498 eval0.template gradients<0, false, true>(
499 hessians_quad + 2 * n_q_points, values_dofs + n_dofs);
501 eval0.template gradients<0, false, false>(
502 hessians_quad + 2 * n_q_points, values_dofs + n_dofs);
505 values_dofs[2] = hessians_quad[0];
514 values_dofs += 3 * n_dofs;
515 hessians_quad += dim * (dim + 1) / 2 * n_q_points;
523 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
533 template <
bool do_
integrate>
539 Number *values_dofs_in,
542 Number *scratch_data,
543 const unsigned int subface_index,
544 const unsigned int face_direction)
548 const int degree = fe_degree != -1 ? fe_degree : shape_data[0].fe_degree;
549 const int n_rows_n = degree + 1;
550 const int n_rows_t = degree;
551 const ::ndarray<int, 3, 3> dofs_per_direction{
552 {{{n_rows_n, n_rows_t, n_rows_t}},
553 {{n_rows_t, n_rows_n, n_rows_t}},
554 {{n_rows_t, n_rows_t, n_rows_n}}}};
565 (fe_degree > 0 ? fe_degree : 0),
570 std::array<int, dim> values_dofs_offsets = {};
571 for (
unsigned int comp = 0; comp < dim - 1; ++comp)
574 values_dofs_offsets[comp + 1] =
575 values_dofs_offsets[comp] +
576 3 * dofs_per_direction[comp][(face_direction + 1) % dim];
578 values_dofs_offsets[comp + 1] =
579 values_dofs_offsets[comp] +
580 3 * dofs_per_direction[comp][(face_direction + 1) % dim] *
581 dofs_per_direction[comp][(face_direction + 2) % dim];
587 std::array<unsigned int, dim> components;
588 for (
unsigned int comp = 0; comp < dim; ++comp)
589 components[comp] = (face_direction + comp + 1) % dim;
591 for (
const unsigned int comp : components)
593 Number *values_dofs = values_dofs_in + values_dofs_offsets[comp];
595 std::array<int, 2> n_blocks{
596 {dofs_per_direction[comp][(face_direction + 1) % dim],
597 (dim > 2 ? dofs_per_direction[comp][(face_direction + 2) % dim] :
600 if constexpr (dim == 3)
609 shape_data[0].shape_gradients_collocation_eo.data(),
619 if (n_blocks[0] == n_rows_n)
621 eval.template normal<0>(shape_data[0],
624 eval.template tangential<1, 0>(shape_data[1],
630 eval.template normal<0>(shape_data[0],
632 n_blocks[0] * n_blocks[1],
634 eval.template tangential<1, 0, dim>(shape_data[1],
639 else if (n_blocks[1] == n_rows_n)
641 eval.template normal<1>(shape_data[0],
644 eval.template tangential<0, 1>(shape_data[1],
650 eval.template normal<1>(shape_data[0],
652 n_blocks[0] * n_blocks[1],
654 eval.template tangential<0, 1, dim>(shape_data[1],
661 Eval eval(shape_data[1].shape_values_eo.data(), {}, {});
662 eval.template values<0, true, false>(values_dofs, values);
663 eval.template values<1, true, false>(values, values);
666 eval.template values<0, true, false>(values_dofs +
670 eval.template values<1, true, false, dim>(
671 scratch_data, gradients + 2);
676 eval_g.template gradients<0, true, false, dim>(values,
678 eval_g.template gradients<1, true, false, dim>(values,
694 eval_g.template gradients<0, false, true, dim>(
697 eval_g.template gradients<0, false, false, dim>(
699 eval_g.template gradients<1, false, true, dim>(gradients +
703 if (n_blocks[0] == n_rows_n)
705 eval.template tangential<1, 0>(shape_data[1],
708 eval.template normal<0>(shape_data[0],
714 eval.template tangential<1, 0, dim>(shape_data[1],
717 eval.template normal<0>(shape_data[0],
720 n_blocks[0] * n_blocks[1]);
723 else if (n_blocks[1] == n_rows_n)
725 eval.template tangential<0, 1>(shape_data[1],
728 eval.template normal<1>(shape_data[0],
734 eval.template tangential<0, 1, dim>(shape_data[1],
737 eval.template normal<1>(shape_data[0],
740 n_blocks[0] * n_blocks[1]);
745 Eval eval_iso(shape_data[1].shape_values_eo.data(),
748 eval_iso.template values<1, false, false>(values, values);
749 eval_iso.template values<0, false, false>(values,
753 eval_iso.template values<1, false, false, dim>(
754 gradients + 2, scratch_data);
755 eval_iso.template values<0, false, false>(
757 values_dofs + n_blocks[0] * n_blocks[1]);
773 if (n_blocks[0] == n_rows_n)
775 EvalN eval(shape_data[0].shape_values_eo,
776 shape_data[0].shape_gradients_eo,
778 eval.template values<0, true, false>(values_dofs, values);
781 eval.template gradients<0, true, false, dim>(
782 values_dofs, gradients);
783 eval.template values<0, true, false, dim>(
784 values_dofs + n_rows_n, gradients + 1);
789 Eval eval(shape_data[1].shape_values_eo,
790 shape_data[1].shape_gradients_eo,
792 eval.template values<0, true, false>(values_dofs, values);
795 eval.template gradients<0, true, false, dim>(
796 values_dofs, gradients);
797 eval.template values<0, true, false, dim>(
798 values_dofs + n_rows_t, gradients + 1);
805 if (n_blocks[0] == n_rows_n)
807 EvalN eval(shape_data[0].shape_values_eo,
808 shape_data[0].shape_gradients_eo,
811 eval.template values<0, false, false>(values,
816 eval.template gradients<0, false, true, dim>(
817 gradients, values_dofs);
819 eval.template gradients<0, false, false, dim>(
820 gradients, values_dofs);
821 eval.template values<0, false, false, dim>(
822 gradients + 1, values_dofs + n_rows_n);
827 Eval eval(shape_data[1].shape_values_eo,
828 shape_data[1].shape_gradients_eo,
831 eval.template values<0, false, false>(values,
836 eval.template gradients<0, false, true, dim>(
837 gradients, values_dofs);
839 eval.template gradients<0, false, false, dim>(
840 gradients, values_dofs);
841 eval.template values<0, false, false, dim>(
842 gradients + 1, values_dofs + n_rows_t);
855 template <
int dim,
int fe_degree,
typename Number>
861 template <
bool do_evaluate,
bool add_
into_output>
868 const unsigned int face_no)
870 Assert(
static_cast<unsigned int>(fe_degree) ==
871 shape_info.
data.front().fe_degree ||
876 n_components, input, output, flags, face_no, shape_info);
879 const unsigned int fe_degree_ = shape_info.
data.front().fe_degree;
888 shape_info.
data.front().shape_data_on_face,
897 template <
bool do_evaluate,
bool add_
into_output>
900 const unsigned int n_components,
905 const unsigned int face_no)
907 Assert(
static_cast<unsigned int>(fe_degree + 1) ==
908 shape_info.
data.front().n_q_points_1d ||
918 shape_info.
data.front().quadrature.size(),
919 shape_info.
data.front().quadrature_data_on_face,
925 template <
bool do_evaluate,
bool add_
into_output,
int face_direction = 0>
931 const unsigned int face_no,
932 const unsigned int n_points_1d,
934 const unsigned int dofs_per_component_on_cell,
935 const unsigned int dofs_per_component_on_face)
937 if (face_direction == face_no / 2)
939 constexpr int stride_ =
Utilities::pow(fe_degree + 1, face_direction);
941 const int n_rows = fe_degree != -1 ? fe_degree + 1 : n_points_1d;
943 const std::array<int, 2> n_blocks{
944 {(dim > 1 ? n_rows : 1), (dim > 2 ? n_rows : 1)}};
945 std::array<int, 2> steps;
946 if constexpr (face_direction == 0)
947 steps = {{n_rows, 0}};
948 else if constexpr (face_direction == 1 && dim == 2)
950 else if constexpr (face_direction == 1)
952 steps = {{n_rows * n_rows, -n_rows * n_rows * n_rows + 1}};
953 else if constexpr (face_direction == 2)
956 for (
unsigned int c = 0; c < n_components; ++c)
963 2>(shape_data[face_no % 2].begin(),
975 1>(shape_data[face_no % 2].begin(),
987 0>(shape_data[face_no % 2].begin(),
996 input += dofs_per_component_on_cell;
997 output += dofs_per_component_on_face;
1001 output += dofs_per_component_on_cell;
1002 input += dofs_per_component_on_face;
1006 else if (face_direction < dim)
1010 std::min(face_direction + 1, dim - 1)>(
1018 dofs_per_component_on_cell,
1019 dofs_per_component_on_face);
1023 template <
bool do_evaluate,
1024 bool add_into_output,
1025 int face_direction = 0,
1026 int max_derivative = 0>
1029 const unsigned int n_components,
1030 const Number *input,
1033 const unsigned int face_no,
1045 bool increase_max_der =
false;
1048 increase_max_der =
true;
1050 if (face_direction == face_no / 2 && !increase_max_der)
1052 constexpr int stride1 =
Utilities::pow(fe_degree + 1, face_direction);
1053 constexpr int stride0 =
Utilities::pow(fe_degree, face_direction);
1054 constexpr int stride2 = fe_degree * (fe_degree + 1);
1057 fe_degree != -1 ? fe_degree : shape_info.
data[0].fe_degree;
1058 const int n_rows_n = degree + 1;
1059 const int n_rows_t = degree;
1061 std::array<int, 3> strides{{1, 1, 1}};
1062 if (face_direction > 0)
1066 strides[1] = n_rows_t * (face_direction == 3 ? n_rows_n : 1);
1069 const ::ndarray<int, 3, 3> dofs_per_direction{
1070 {{{n_rows_n, n_rows_t, n_rows_t}},
1071 {{n_rows_t, n_rows_n, n_rows_t}},
1072 {{n_rows_t, n_rows_t, n_rows_n}}}};
1074 std::array<int, 2> steps, n_blocks;
1076 if constexpr (face_direction == 0)
1077 steps = {{degree + (face_direction == 0), 0}};
1078 else if constexpr (face_direction == 1 && dim == 2)
1080 else if constexpr (face_direction == 1)
1083 {n_rows_n * n_rows_t, -n_rows_n * n_rows_t * n_rows_t + 1}};
1084 else if constexpr (face_direction == 2)
1087 n_blocks[0] = dofs_per_direction[0][(face_direction + 1) % dim];
1089 dim > 2 ? dofs_per_direction[0][(face_direction + 2) % dim] : 1;
1092 (fe_degree != -1 ? (fe_degree + (face_direction == 0)) : 0),
1093 ((face_direction < 2) ? stride1 : stride2),
1096 max_derivative>(shape_info.
data[face_direction != 0]
1097 .shape_data_on_face[face_no % 2]
1103 degree + (face_direction == 0),
1109 output += 3 * n_blocks[0] * n_blocks[1];
1114 input += 3 * n_blocks[0] * n_blocks[1];
1118 if constexpr (face_direction == 0)
1119 steps = {{degree, 0}};
1121 n_blocks[0] = dofs_per_direction[1][(face_direction + 1) % dim];
1123 dim > 2 ? dofs_per_direction[1][(face_direction + 2) % dim] : 1;
1126 (fe_degree != -1 ? (fe_degree + (face_direction == 1)) : 0),
1127 ((face_direction < 2) ? stride0 : stride2),
1130 max_derivative>(shape_info.
data[face_direction != 1]
1131 .shape_data_on_face[face_no % 2]
1137 degree + (face_direction == 1),
1140 if constexpr (dim > 2)
1145 output += 3 * n_blocks[0] * n_blocks[1];
1150 input += 3 * n_blocks[0] * n_blocks[1];
1153 if constexpr (face_direction == 0)
1154 steps = {{degree, 0}};
1155 else if constexpr (face_direction == 1)
1158 {n_rows_t * n_rows_t, -n_rows_n * n_rows_t * n_rows_t + 1}};
1159 else if constexpr (face_direction == 2)
1162 n_blocks[0] = dofs_per_direction[2][(face_direction + 1) % dim];
1163 n_blocks[1] = dofs_per_direction[2][(face_direction + 2) % dim];
1166 (fe_degree != -1 ? (fe_degree + (face_direction == 2)) : 0),
1170 max_derivative>(shape_info.
data[face_direction != 2]
1171 .shape_data_on_face[face_no % 2]
1177 degree + (face_direction == 2),
1181 else if (face_direction == face_no / 2)
1188 n_components, input, output, flag, face_no, shape_info);
1190 else if (face_direction < dim)
1192 if (increase_max_der)
1196 std::min(face_direction + 1, dim - 1),
1198 n_components, input, output, flag, face_no, shape_info);
1204 std::min(face_direction + 1, dim - 1),
1206 n_components, input, output, flag, face_no, shape_info);
1215 template <
typename VectorizedArrayType,
typename Number2>
1219 for (
unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
1220 dst[v] = src_ptr[v];
1227 template <
typename Number, std::
size_t w
idth>
1237 template <
typename VectorizedArrayType,
typename Number2>
1240 const unsigned int *indices,
1241 VectorizedArrayType &dst)
1243 for (
unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
1244 dst[v] = src_ptr[indices[v]];
1251 template <
typename Number, std::
size_t w
idth>
1254 const unsigned int *indices,
1257 dst.
gather(src_ptr, indices);
1263 template <
typename VectorizedArrayType,
typename Number2>
1267 for (
unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
1268 dst_ptr[v] += src[v];
1275 template <
typename Number, std::
size_t w
idth>
1281 (tmp + src).store(dst_ptr);
1287 template <
typename VectorizedArrayType,
typename Number2>
1290 const unsigned int *indices,
1293 for (
unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
1294 dst_ptr[indices[v]] += src[v];
1301 template <
typename Number, std::
size_t w
idth>
1304 const unsigned int *indices,
1307#if DEAL_II_VECTORIZATION_WIDTH_IN_BITS < 512
1308 for (
unsigned int v = 0; v < width; ++v)
1309 dst_ptr[indices[v]] += src[v];
1312 tmp.
gather(dst_ptr, indices);
1313 (tmp + src).scatter(indices, dst_ptr);
1319 template <
typename Number>
1322 const unsigned int n_components,
1324 const unsigned int *orientation,
1325 const bool integrate,
1326 const std::size_t n_q_points,
1328 Number *values_quad,
1329 Number *gradients_quad,
1330 Number *hessians_quad)
1332 for (
unsigned int c = 0; c < n_components; ++c)
1337 for (
unsigned int q = 0; q < n_q_points; ++q)
1338 tmp_values[q] = values_quad[c * n_q_points + orientation[q]];
1340 for (
unsigned int q = 0; q < n_q_points; ++q)
1341 tmp_values[orientation[q]] = values_quad[c * n_q_points + q];
1342 for (
unsigned int q = 0; q < n_q_points; ++q)
1343 values_quad[c * n_q_points + q] = tmp_values[q];
1346 for (
unsigned int d = 0; d < dim; ++d)
1349 for (
unsigned int q = 0; q < n_q_points; ++q)
1351 gradients_quad[(c * n_q_points + orientation[q]) * dim + d];
1353 for (
unsigned int q = 0; q < n_q_points; ++q)
1354 tmp_values[orientation[q]] =
1355 gradients_quad[(c * n_q_points + q) * dim + d];
1356 for (
unsigned int q = 0; q < n_q_points; ++q)
1357 gradients_quad[(c * n_q_points + q) * dim + d] = tmp_values[q];
1361 const unsigned int hdim = (dim * (dim + 1)) / 2;
1362 for (
unsigned int d = 0; d < hdim; ++d)
1365 for (
unsigned int q = 0; q < n_q_points; ++q)
1366 tmp_values[q] = hessians_quad[(c * hdim + d) * n_q_points +
1369 for (
unsigned int q = 0; q < n_q_points; ++q)
1370 tmp_values[orientation[q]] =
1371 hessians_quad[(c * hdim + d) * n_q_points + q];
1372 for (
unsigned int q = 0; q < n_q_points; ++q)
1373 hessians_quad[(c * hdim + d) * n_q_points + q] =
1382 template <
typename Number,
typename VectorizedArrayType>
1385 const unsigned int dim,
1386 const unsigned int n_components,
1387 const unsigned int v,
1389 const unsigned int *orientation,
1390 const bool integrate,
1391 const std::size_t n_q_points,
1393 VectorizedArrayType *values_quad,
1394 VectorizedArrayType *gradients_quad =
nullptr,
1395 VectorizedArrayType *hessians_quad =
nullptr)
1397 for (
unsigned int c = 0; c < n_components; ++c)
1402 for (
unsigned int q = 0; q < n_q_points; ++q)
1403 tmp_values[q] = values_quad[c * n_q_points + orientation[q]][v];
1405 for (
unsigned int q = 0; q < n_q_points; ++q)
1406 tmp_values[orientation[q]] = values_quad[c * n_q_points + q][v];
1407 for (
unsigned int q = 0; q < n_q_points; ++q)
1408 values_quad[c * n_q_points + q][v] = tmp_values[q];
1411 for (
unsigned int d = 0; d < dim; ++d)
1415 for (
unsigned int q = 0; q < n_q_points; ++q)
1417 gradients_quad[(c * n_q_points + orientation[q]) * dim + d]
1420 for (
unsigned int q = 0; q < n_q_points; ++q)
1421 tmp_values[orientation[q]] =
1422 gradients_quad[(c * n_q_points + q) * dim + d][v];
1423 for (
unsigned int q = 0; q < n_q_points; ++q)
1424 gradients_quad[(c * n_q_points + q) * dim + d][v] =
1430 const unsigned int hdim = (dim * (dim + 1)) / 2;
1431 for (
unsigned int d = 0; d < hdim; ++d)
1434 for (
unsigned int q = 0; q < n_q_points; ++q)
1435 tmp_values[q] = hessians_quad[(c * hdim + d) * n_q_points +
1438 for (
unsigned int q = 0; q < n_q_points; ++q)
1439 tmp_values[orientation[q]] =
1440 hessians_quad[(c * hdim + d) * n_q_points + q][v];
1441 for (
unsigned int q = 0; q < n_q_points; ++q)
1442 hessians_quad[(c * hdim + d) * n_q_points + q][v] =
1451 template <
int dim,
typename Number>
1457 const Number *values_dofs,
1461 const auto &shape_data = shape_info.data.front();
1470 const unsigned int face_no = fe_eval.
get_face_no();
1472 const std::size_t n_dofs = shape_info.dofs_per_component_on_cell;
1473 const std::size_t n_q_points = shape_info.n_q_points_faces[face_no];
1477 const auto *
const shape_values =
1478 &shape_data.shape_values_face(face_no, face_orientation, 0);
1481 auto *in = values_dofs;
1483 for (
unsigned int c = 0; c < n_components; c += 3)
1485 if (c + 1 == n_components)
1494 shape_values, in, out, n_dofs, n_q_points, 1, 1);
1495 else if (c + 2 == n_components)
1504 shape_values, in, out, n_dofs, n_q_points, 1, 1);
1514 shape_values, in, out, n_dofs, n_q_points, 1, 1);
1516 out += 3 * n_q_points;
1524 const auto *in = values_dofs;
1526 const auto *
const shape_gradients =
1527 &shape_data.shape_gradients_face(face_no, face_orientation, 0);
1529 for (
unsigned int c = 0; c < n_components; c += 3)
1531 if (c + 1 == n_components)
1540 shape_gradients, in, out, n_dofs, n_q_points * dim, 1, 1);
1541 else if (c + 2 == n_components)
1550 shape_gradients, in, out, n_dofs, n_q_points * dim, 1, 1);
1560 shape_gradients, in, out, n_dofs, n_q_points * dim, 1, 1);
1561 out += 3 * n_q_points * dim;
1572 template <
int fe_degree>
1579 const Number *values_dofs,
1581 const bool use_vectorization,
1583 Number *scratch_data)
1587 if (use_vectorization ==
false)
1589 const auto &shape_data = shape_info.data.front();
1591 const unsigned int dofs_per_comp_face =
1595 const unsigned int dofs_per_face = n_components * dofs_per_comp_face;
1597 for (
unsigned int v = 0; v < Number::size(); ++v)
1604 for (
unsigned int i = 0; i < 3 * dofs_per_face; ++i)
1610 template interpolate<true, false>(n_components,
1617 for (
unsigned int i = 0; i < 3 * dofs_per_face; ++i)
1618 temp[i][v] = scratch_data[i][v];
1623 template interpolate<true, false>(n_components,
1632 template <
int fe_degree,
int n_q_po
ints_1d>
1641 Number *scratch_data)
1644 const auto &shape_data = shape_info.data.front();
1647 constexpr unsigned int n_q_points_1d_actual =
1648 fe_degree > -1 ? n_q_points_1d : 0;
1654 n_q_points_1d_actual,
1656 template evaluate_or_integrate_in_face<false>(
1666 else if (fe_degree > -1 &&
1672 n_q_points_1d_actual,
1687 n_q_points_1d_actual,
1705 const unsigned int n_components,
1708 const bool use_vectorization,
1713 if (use_vectorization ==
false)
1715 for (
unsigned int v = 0; v < Number::size(); ++v)
1729 &shape_info.face_orientations_quad(
1732 shape_info.n_q_points_face,
1746 shape_info.n_q_points_face,
1755 template <
int fe_degree,
int n_q_po
ints_1d>
1759 const Number *values_dofs_actual,
1763 const auto &shape_data = shape_info.data.front();
1765 const unsigned int dofs_per_comp_face =
1773 Number *temp2 = temp1 + 3 * n_components * dofs_per_comp_face;
1775 const Number *values_dofs =
1779 shape_info.n_q_points)) :
1785 const_cast<Number *
>(values_dofs),
1789 bool use_vectorization =
true;
1793 for (
unsigned int v = 0; v < Number::size(); ++v)
1796 use_vectorization =
false;
1807 n_components, evaluation_flag, fe_eval, temp1, temp2);
1811 n_components, evaluation_flag, fe_eval, use_vectorization, temp1);
1816 template <
int fe_degree,
int n_q_po
ints_1d>
1818 run(
const unsigned int n_components,
1820 const Number *values_dofs,
1840 template <
int dim,
typename Number>
1843 template <
int fe_degree>
1845 run(
const unsigned int n_components,
1847 const Number *values_dofs,
1851 const auto &shape_data = shape_info.data.front();
1853 const unsigned int dofs_per_comp_face =
1861 Number *scratch_data = temp + 3 * n_components * dofs_per_comp_face;
1863 bool use_vectorization =
true;
1867 for (
unsigned int v = 0; v < Number::size(); ++v)
1870 use_vectorization =
false;
1873 template project_to_face<fe_degree>(n_components,
1887 template <
int dim,
typename Number>
1890 template <
int fe_degree,
int n_q_po
ints_1d>
1892 run(
const unsigned int n_components,
1897 const auto &shape_data = shape_info.data.front();
1899 const unsigned int dofs_per_comp_face =
1907 Number *scratch_data = temp + 3 * n_components * dofs_per_comp_face;
1910 template evaluate_in_face<fe_degree, n_q_points_1d>(
1911 n_components, evaluation_flag, fe_eval, temp, scratch_data);
1919 template <
int dim,
typename Number>
1924 const unsigned int n_components,
1926 Number *values_dofs,
1928 const bool sum_into_values)
1931 const auto &shape_data = shape_info.data.front();
1940 const unsigned int face_no = fe_eval.
get_face_no();
1942 const std::size_t n_dofs = shape_info.dofs_per_component_on_cell;
1943 const std::size_t n_q_points = shape_info.n_q_points_faces[face_no];
1948 const auto *
const shape_values =
1949 &shape_data.shape_values_face(face_no, face_orientation, 0);
1952 auto *out = values_dofs;
1954 for (
unsigned int c = 0; c < n_components; c += 3)
1956 if (sum_into_values)
1958 if (c + 1 == n_components)
1967 shape_values, in, out, n_dofs, n_q_points, 1, 1);
1968 else if (c + 2 == n_components)
1977 shape_values, in, out, n_dofs, n_q_points, 1, 1);
1987 shape_values, in, out, n_dofs, n_q_points, 1, 1);
1991 if (c + 1 == n_components)
2000 shape_values, in, out, n_dofs, n_q_points, 1, 1);
2001 else if (c + 2 == n_components)
2010 shape_values, in, out, n_dofs, n_q_points, 1, 1);
2020 shape_values, in, out, n_dofs, n_q_points, 1, 1);
2022 in += 3 * n_q_points;
2030 auto *out = values_dofs;
2032 const auto *
const shape_gradients =
2033 &shape_data.shape_gradients_face(face_no, face_orientation, 0);
2035 for (
unsigned int c = 0; c < n_components; ++c)
2037 if (!sum_into_values &&
2040 if (c + 1 == n_components)
2049 shape_gradients, in, out, n_dofs, n_q_points * dim, 1, 1);
2050 else if (c + 2 == n_components)
2059 shape_gradients, in, out, n_dofs, n_q_points * dim, 1, 1);
2069 shape_gradients, in, out, n_dofs, n_q_points * dim, 1, 1);
2073 if (c + 1 == n_components)
2082 shape_gradients, in, out, n_dofs, n_q_points * dim, 1, 1);
2083 else if (c + 2 == n_components)
2092 shape_gradients, in, out, n_dofs, n_q_points * dim, 1, 1);
2102 shape_gradients, in, out, n_dofs, n_q_points * dim, 1, 1);
2104 in += 3 * n_q_points * dim;
2120 const unsigned int n_components,
2123 const bool use_vectorization,
2128 if (use_vectorization ==
false)
2130 for (
unsigned int v = 0; v < Number::size(); ++v)
2147 shape_info.n_q_points_face,
2162 shape_info.n_q_points_face,
2169 template <
int fe_degree,
int n_q_po
ints_1d>
2178 Number *scratch_data)
2181 const auto &shape_data = shape_info.data.front();
2183 const unsigned int n_q_points_1d_actual =
2184 fe_degree > -1 ? n_q_points_1d : 0;
2191 n_q_points_1d_actual,
2193 template evaluate_or_integrate_in_face<true>(
2203 else if (fe_degree > -1 &&
2211 n_q_points_1d_actual,
2226 n_q_points_1d_actual,
2238 template <
int fe_degree>
2245 Number *values_dofs,
2247 const bool use_vectorization,
2249 Number *scratch_data,
2250 const bool sum_into_values)
2253 const auto &shape_data = shape_info.data.front();
2255 const unsigned int dofs_per_comp_face =
2259 const unsigned int dofs_per_face = n_components * dofs_per_comp_face;
2261 if (use_vectorization ==
false)
2263 for (
unsigned int v = 0; v < Number::size(); ++v)
2272 template interpolate<false, false>(n_components,
2279 if (sum_into_values)
2280 for (
unsigned int i = 0; i < 3 * dofs_per_face; ++i)
2281 values_dofs[i][v] += scratch_data[i][v];
2283 for (
unsigned int i = 0; i < 3 * dofs_per_face; ++i)
2284 values_dofs[i][v] = scratch_data[i][v];
2289 if (sum_into_values)
2291 template interpolate<false, true>(n_components,
2299 template interpolate<false, false>(n_components,
2308 template <
int fe_degree,
int n_q_po
ints_1d>
2312 Number *values_dofs_actual,
2314 const bool sum_into_values)
2317 const auto &shape_data = shape_info.data.front();
2319 const unsigned int dofs_per_comp_face =
2325 Number *temp2 = temp1 + 3 * n_components * dofs_per_comp_face;
2328 Number *values_dofs =
2335 bool use_vectorization =
true;
2344 [&](
const auto &v) {
2345 return v == fe_eval.get_cell_ids()[0] ||
2346 v == numbers::invalid_unsigned_int;
2351 n_components, integration_flag, fe_eval, use_vectorization, temp1);
2354 n_components, integration_flag, fe_eval, temp1, temp2);
2368 n_components, values_dofs_actual, values_dofs, fe_eval);
2373 template <
int fe_degree,
int n_q_po
ints_1d>
2375 run(
const unsigned int n_components,
2377 Number *values_dofs,
2379 const bool sum_into_values)
2400 template <
int dim,
typename Number>
2403 template <
int fe_degree>
2405 run(
const unsigned int n_components,
2407 Number *values_dofs,
2409 const bool sum_into_values)
2412 const auto &shape_data = shape_info.data.front();
2414 const unsigned int dofs_per_comp_face =
2420 Number *scratch_data = temp + 3 * n_components * dofs_per_comp_face;
2422 bool use_vectorization =
true;
2431 [&](
const auto &v) {
2432 return v == fe_eval.get_cell_ids()[0] ||
2433 v == numbers::invalid_unsigned_int;
2437 template collect_from_face<fe_degree>(n_components,
2452 template <
int dim,
typename Number>
2455 template <
int fe_degree,
int n_q_po
ints_1d>
2457 run(
const unsigned int n_components,
2463 const auto &shape_data = shape_info.data.front();
2465 const unsigned int dofs_per_comp_face =
2471 Number *scratch_data = temp + 3 * n_components * dofs_per_comp_face;
2474 template integrate_in_face<fe_degree, n_q_points_1d>(
2475 n_components, integration_flag, fe_eval, temp, scratch_data);
2483 template <
int n_face_orientations,
2485 typename EvaluationData,
2486 const bool check_face_orientations =
false>
2490 const unsigned int n_components,
2492 typename Processor::Number2_ *global_vector_ptr,
2494 const EvaluationData &fe_eval,
2495 typename Processor::VectorizedArrayType_ *temp1)
2497 constexpr int dim = Processor::dim_;
2498 constexpr int fe_degree = Processor::fe_degree_;
2499 using VectorizedArrayType =
typename Processor::VectorizedArrayType_;
2500 constexpr int n_lanes = VectorizedArrayType::size();
2502 using Number =
typename Processor::Number_;
2503 using Number2_ =
typename Processor::Number2_;
2505 const auto &shape_data = fe_eval.get_shape_info().data.front();
2506 constexpr bool integrate = Processor::do_integrate;
2507 const unsigned int face_no = fe_eval.get_face_no();
2508 const auto &dof_info = fe_eval.get_dof_info();
2509 const unsigned int cell = fe_eval.get_cell_or_face_batch_id();
2511 fe_eval.get_dof_access_index();
2513 dof_info.index_storage_variants[dof_access_index].size());
2514 constexpr unsigned int dofs_per_face =
2516 const unsigned int subface_index = fe_eval.get_subface_index();
2518 const unsigned int n_filled_lanes =
2519 dof_info.n_vectorization_lanes_filled[dof_access_index][cell];
2521 bool all_faces_are_same = n_filled_lanes == n_lanes;
2522 if (n_face_orientations == n_lanes)
2523 for (
unsigned int v = 1; v < n_lanes; ++v)
2524 if (fe_eval.get_face_no(v) != fe_eval.get_face_no(0) ||
2525 fe_eval.get_face_orientation(v) != fe_eval.get_face_orientation(0))
2527 all_faces_are_same =
false;
2532 std::array<const unsigned int *, n_face_orientations> orientation = {};
2534 if (dim == 3 && n_face_orientations == n_lanes && !all_faces_are_same &&
2535 fe_eval.is_interior_face() == 0)
2536 for (
unsigned int v = 0; v < n_lanes; ++v)
2544 if (shape_data.nodal_at_cell_boundaries &&
2545 fe_eval.get_face_orientation(v) != 0)
2550 check_face_orientations ==
false)
2564 orientation[v] = &fe_eval.get_shape_info().face_orientations_dofs(
2565 fe_eval.get_face_orientation(v), 0);
2568 else if (dim == 3 && fe_eval.get_face_orientation() != 0)
2572 check_face_orientations ==
false)
2586 for (
unsigned int v = 0; v < n_face_orientations; ++v)
2587 orientation[v] = &fe_eval.get_shape_info().face_orientations_dofs(
2588 fe_eval.get_face_orientation(), 0);
2594 VectorizedArrayType grad_weight =
2596 .shape_data_on_face[0][fe_degree + (integrate ? (2 - face_no % 2) :
2597 (1 + face_no % 2))];
2600 std::array<const unsigned int *, n_face_orientations> index_array_hermite =
2604 if (n_face_orientations == 1)
2605 index_array_hermite[0] =
2606 &fe_eval.get_shape_info().face_to_cell_index_hermite(face_no, 0);
2609 for (
unsigned int v = 0; v < n_lanes; ++v)
2614 const auto face_no = fe_eval.get_face_no(v);
2618 .shape_data_on_face[0][fe_degree + (integrate ?
2619 (2 - (face_no % 2)) :
2620 (1 + (face_no % 2)))];
2622 index_array_hermite[v] =
2623 &fe_eval.get_shape_info().face_to_cell_index_hermite(face_no,
2630 std::array<const unsigned int *, n_face_orientations> index_array_nodal =
2632 if (shape_data.nodal_at_cell_boundaries ==
true)
2634 if (n_face_orientations == 1)
2635 index_array_nodal[0] =
2636 &fe_eval.get_shape_info().face_to_cell_index_nodal(face_no, 0);
2639 for (
unsigned int v = 0; v < n_lanes; ++v)
2644 const auto face_no = fe_eval.get_face_no(v);
2646 index_array_nodal[v] =
2647 &fe_eval.get_shape_info().face_to_cell_index_nodal(face_no,
2654 const auto reorientate = [&](
const unsigned int v,
const unsigned int i) {
2655 return (!check_face_orientations || orientation[v] ==
nullptr) ?
2660 const unsigned int cell_index =
2662 fe_eval.get_cell_ids()[0] :
2664 const unsigned int *dof_indices =
2665 &dof_info.dof_indices_contiguous[dof_access_index][cell_index];
2667 for (
unsigned int comp = 0; comp < n_components; ++comp)
2669 const std::size_t index_offset =
2670 dof_info.component_dof_indices_offset
2671 [fe_eval.get_active_fe_index()]
2672 [fe_eval.get_first_selected_component()] +
2676 if (n_face_orientations == 1 &&
2677 dof_info.index_storage_variants[dof_access_index][cell] ==
2679 interleaved_contiguous)
2682 dof_info.n_vectorization_lanes_filled[dof_access_index][cell],
2684 Number2_ *vector_ptr =
2685 global_vector_ptr + dof_indices[0] + index_offset * n_lanes;
2689 for (
unsigned int i = 0; i < dofs_per_face; ++i)
2693 const unsigned int ind1 = index_array_hermite[0][2 * i];
2694 const unsigned int ind2 = index_array_hermite[0][2 * i + 1];
2695 const unsigned int i_ = reorientate(0, i);
2696 proc.hermite_grad_vectorized(temp1[i_],
2697 temp1[i_ + dofs_per_face],
2698 vector_ptr + ind1 * n_lanes,
2699 vector_ptr + ind2 * n_lanes,
2705 for (
unsigned int i = 0; i < dofs_per_face; ++i)
2709 const unsigned int i_ = reorientate(0, i);
2710 const unsigned int ind = index_array_nodal[0][i];
2711 proc.value_vectorized(temp1[i_],
2712 vector_ptr + ind * n_lanes);
2718 else if (n_face_orientations == 1 &&
2719 dof_info.index_storage_variants[dof_access_index][cell] ==
2721 interleaved_contiguous_strided)
2724 dof_info.n_vectorization_lanes_filled[dof_access_index][cell],
2726 Number2_ *vector_ptr = global_vector_ptr + index_offset * n_lanes;
2729 for (
unsigned int i = 0; i < dofs_per_face; ++i)
2733 const unsigned int i_ = reorientate(0, i);
2734 const unsigned int ind1 =
2735 index_array_hermite[0][2 * i] * n_lanes;
2736 const unsigned int ind2 =
2737 index_array_hermite[0][2 * i + 1] * n_lanes;
2738 proc.hermite_grad_vectorized_indexed(
2740 temp1[i_ + dofs_per_face],
2750 for (
unsigned int i = 0; i < dofs_per_face; ++i)
2754 const unsigned int i_ = reorientate(0, i);
2755 const unsigned int ind = index_array_nodal[0][i] * n_lanes;
2756 proc.value_vectorized_indexed(temp1[i_],
2764 else if (n_face_orientations == 1 &&
2765 dof_info.index_storage_variants[dof_access_index][cell] ==
2767 interleaved_contiguous_mixed_strides)
2769 const unsigned int *strides =
2770 &dof_info.dof_indices_interleave_strides[dof_access_index]
2772 unsigned int indices[n_lanes];
2773 for (
unsigned int v = 0; v < n_lanes; ++v)
2774 indices[v] = dof_indices[v] + index_offset * strides[v];
2775 const unsigned int n_filled_lanes =
2776 dof_info.n_vectorization_lanes_filled[dof_access_index][cell];
2780 if (n_filled_lanes == n_lanes)
2781 for (
unsigned int i = 0; i < dofs_per_face; ++i)
2785 const unsigned int i_ = reorientate(0, i);
2786 unsigned int ind1[n_lanes];
2788 for (
unsigned int v = 0; v < n_lanes; ++v)
2789 ind1[v] = indices[v] +
2790 index_array_hermite[0][2 * i] * strides[v];
2791 unsigned int ind2[n_lanes];
2793 for (
unsigned int v = 0; v < n_lanes; ++v)
2797 index_array_hermite[0][2 * i + 1] * strides[v];
2798 proc.hermite_grad_vectorized_indexed(
2800 temp1[i_ + dofs_per_face],
2809 if (integrate ==
false)
2810 for (
unsigned int i = 0; i < 2 * dofs_per_face; ++i)
2811 temp1[i] = VectorizedArrayType();
2813 for (
unsigned int v = 0; v < n_filled_lanes; ++v)
2814 for (
unsigned int i = 0; i < dofs_per_face; ++i)
2816 const unsigned int i_ =
2817 reorientate(n_face_orientations == 1 ? 0 : v, i);
2820 temp1[i_ + dofs_per_face][v],
2824 [n_face_orientations == 1 ? 0 : v][2 * i] *
2828 index_array_hermite[n_face_orientations == 1 ?
2832 grad_weight[n_face_orientations == 1 ? 0 : v]);
2838 if (n_filled_lanes == n_lanes)
2839 for (
unsigned int i = 0; i < dofs_per_face; ++i)
2842 unsigned int ind[n_lanes];
2844 for (
unsigned int v = 0; v < n_lanes; ++v)
2846 indices[v] + index_array_nodal[0][i] * strides[v];
2847 const unsigned int i_ = reorientate(0, i);
2848 proc.value_vectorized_indexed(temp1[i_],
2854 if (integrate ==
false)
2855 for (
unsigned int i = 0; i < dofs_per_face; ++i)
2856 temp1[i] = VectorizedArrayType();
2858 for (
unsigned int v = 0; v < n_filled_lanes; ++v)
2859 for (
unsigned int i = 0; i < dofs_per_face; ++i)
2861 temp1[reorientate(n_face_orientations == 1 ? 0 : v,
2865 index_array_nodal[n_face_orientations == 1 ? 0 : v]
2873 else if (n_face_orientations > 1 ||
2874 dof_info.index_storage_variants[dof_access_index][cell] ==
2878 Number2_ *vector_ptr = global_vector_ptr + index_offset;
2880 const bool vectorization_possible =
2881 all_faces_are_same && (sm_ptr ==
nullptr);
2883 std::array<Number2_ *, n_lanes> vector_ptrs{{
nullptr}};
2884 std::array<unsigned int, n_lanes> reordered_indices{
2887 if (vectorization_possible ==
false)
2889 if (n_face_orientations == 1)
2891 for (
unsigned int v = 0; v < n_filled_lanes; ++v)
2892 if (sm_ptr ==
nullptr)
2894 vector_ptrs[v] = vector_ptr + dof_indices[v];
2900 .dof_indices_contiguous_sm[dof_access_index]
2901 [cell * n_lanes + v];
2902 vector_ptrs[v] =
const_cast<Number2_ *
>(
2903 sm_ptr->operator[](temp.first).data() +
2904 temp.second + index_offset);
2907 else if (n_face_orientations == n_lanes)
2909 const auto &cells = fe_eval.get_cell_ids();
2910 for (
unsigned int v = 0; v < n_lanes; ++v)
2913 if (sm_ptr ==
nullptr)
2918 .dof_indices_contiguous[dof_access_index]
2925 .dof_indices_contiguous_sm[dof_access_index]
2927 vector_ptrs[v] =
const_cast<Number2_ *
>(
2928 sm_ptr->operator[](temp.first).data() +
2929 temp.second + index_offset);
2938 else if (n_face_orientations == n_lanes)
2940 for (
unsigned int v = 0; v < n_lanes; ++v)
2941 reordered_indices[v] =
2942 dof_info.dof_indices_contiguous[dof_access_index]
2943 [fe_eval.get_cell_ids()[v]];
2944 dof_indices = reordered_indices.data();
2949 if (vectorization_possible)
2950 for (
unsigned int i = 0; i < dofs_per_face; ++i)
2952 const unsigned int ind1 = index_array_hermite[0][2 * i];
2953 const unsigned int ind2 =
2954 index_array_hermite[0][2 * i + 1];
2955 const unsigned int i_ = reorientate(0, i);
2957 proc.hermite_grad_vectorized_indexed(
2959 temp1[i_ + dofs_per_face],
2966 else if (n_face_orientations == 1)
2967 for (
unsigned int i = 0; i < dofs_per_face; ++i)
2969 const unsigned int ind1 = index_array_hermite[0][2 * i];
2970 const unsigned int ind2 =
2971 index_array_hermite[0][2 * i + 1];
2972 const unsigned int i_ = reorientate(0, i);
2974 for (
unsigned int v = 0; v < n_filled_lanes; ++v)
2975 proc.hermite_grad(temp1[i_][v],
2976 temp1[i_ + dofs_per_face][v],
2977 vector_ptrs[v][ind1],
2978 vector_ptrs[v][ind2],
2981 if (integrate ==
false)
2982 for (
unsigned int v = n_filled_lanes; v < n_lanes; ++v)
2985 temp1[i + dofs_per_face][v] = 0.0;
2990 if (integrate ==
false && n_filled_lanes < n_lanes)
2991 for (
unsigned int i = 0; i < dofs_per_face; ++i)
2992 temp1[i] = temp1[i + dofs_per_face] = Number();
2994 for (
unsigned int v = 0; v < n_filled_lanes; ++v)
2995 for (
unsigned int i = 0; i < dofs_per_face; ++i)
2997 temp1[reorientate(v, i)][v],
2998 temp1[reorientate(v, i) + dofs_per_face][v],
2999 vector_ptrs[v][index_array_hermite[v][2 * i]],
3000 vector_ptrs[v][index_array_hermite[v][2 * i + 1]],
3006 if (vectorization_possible)
3007 for (
unsigned int i = 0; i < dofs_per_face; ++i)
3009 const unsigned int ind = index_array_nodal[0][i];
3010 const unsigned int i_ = reorientate(0, i);
3012 proc.value_vectorized_indexed(temp1[i_],
3018 if constexpr (n_face_orientations == 1)
3019 for (
unsigned int i = 0; i < dofs_per_face; ++i)
3021 const unsigned int ind = index_array_nodal[0][i];
3022 const unsigned int i_ = reorientate(0, i);
3024 for (
unsigned int v = 0; v < n_filled_lanes; ++v)
3025 proc.value(temp1[i_][v], vector_ptrs[v][ind]);
3027 if constexpr (integrate ==
false)
3028 for (
unsigned int v = n_filled_lanes; v < n_lanes;
3034 if (integrate ==
false && n_filled_lanes < n_lanes)
3035 for (
unsigned int i = 0; i < dofs_per_face; ++i)
3036 temp1[i] = Number();
3038 for (
unsigned int v = 0; v < n_filled_lanes; ++v)
3039 for (
unsigned int i = 0; i < dofs_per_face; ++i)
3040 proc.value(temp1[reorientate(v, i)][v],
3041 vector_ptrs[v][index_array_nodal[v][i]]);
3052 temp1 += 3 * dofs_per_face;
3058 template <
int dim,
typename Number2,
typename VectorizedArrayType>
3061 using Number =
typename VectorizedArrayType::value_type;
3063 template <
int fe_degree,
int n_q_po
ints_1d>
3065 run(
const unsigned int n_components,
3067 const Number2 *src_ptr,
3076 const unsigned int dofs_per_face =
Utilities::pow(fe_degree + 1, dim - 1);
3079 VectorizedArrayType *scratch_data =
3080 temp + 3 * n_components * dofs_per_face;
3088 p, n_components, evaluation_flag, src_ptr, sm_ptr, fe_eval, temp);
3091 p, n_components, evaluation_flag, src_ptr, sm_ptr, fe_eval, temp);
3100 VectorizedArrayType>::
3101 evaluate_in_face(n_components,
3115 VectorizedArrayType>::
3116 evaluate_in_face(n_components,
3133 for (
unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
3176 template <
typename Number3>
3180 const Number2 *vector_ptr,
3183 const unsigned int fe_degree = shape_info.
data.front().fe_degree;
3184 if (fe_degree < 1 || !shape_info.
data.front().nodal_at_cell_boundaries ||
3187 shape_info.
data.front().element_type !=
3190 vector_ptr ==
nullptr ||
3191 shape_info.
data.front().element_type >
3201 template <
int fe_degree>
3211 template <
typename T0,
typename T1,
typename T2>
3217 const T2 &grad_weight)
3221 temp_2 = grad_weight * (temp_1 - temp_2);
3224 template <
typename T1,
typename T2>
3231 template <
typename T0,
typename T1,
typename T2,
typename T3>
3237 const T2 &grad_weight,
3238 const T3 &indices_1,
3239 const T3 &indices_2)
3243 temp_2 = grad_weight * (temp_1 - temp_2);
3246 template <
typename T0,
typename T1,
typename T2>
3253 template <
typename T0,
typename T1,
typename T2>
3257 const T1 &src_ptr_1,
3258 const T1 &src_ptr_2,
3259 const T2 &grad_weight)
3263 temp_2 = grad_weight * (temp_1 - src_ptr_2);
3266 template <
typename T1,
typename T2>
3278 template <
int dim,
typename Number2,
typename VectorizedArrayType>
3281 using Number =
typename VectorizedArrayType::value_type;
3283 template <
int fe_degree,
int n_q_po
ints_1d>
3285 run(
const unsigned int n_components,
3296 const unsigned int dofs_per_face =
Utilities::pow(fe_degree + 1, dim - 1);
3299 VectorizedArrayType *scratch_data =
3300 temp + 3 * n_components * dofs_per_face;
3310 for (
unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
3354 VectorizedArrayType>::
3355 integrate_in_face(n_components,
3369 VectorizedArrayType>::
3370 integrate_in_face(n_components,
3386 p, n_components, integration_flag, dst_ptr, sm_ptr, fe_eval, temp);
3389 p, n_components, integration_flag, dst_ptr, sm_ptr, fe_eval, temp);
3395 template <
int fe_degree>
3405 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4>
3411 const T4 &grad_weight)
3414 const VectorizedArrayType val = temp_1 - grad_weight * temp_2;
3415 const VectorizedArrayType grad = grad_weight * temp_2;
3420 template <
typename T0,
typename T1>
3428 template <
typename T0,
typename T1,
typename T2,
typename T3>
3434 const T2 &grad_weight,
3435 const T3 &indices_1,
3436 const T3 &indices_2)
3439 const VectorizedArrayType val = temp_1 - grad_weight * temp_2;
3440 const VectorizedArrayType grad = grad_weight * temp_2;
3445 template <
typename T0,
typename T1,
typename T2>
3453 template <
typename T0,
typename T1,
typename T2>
3459 const T2 &grad_weight)
3462 const Number val = temp_1 - grad_weight * temp_2;
3463 const Number grad = grad_weight * temp_2;
3468 template <
typename T0,
typename T1>
std::uint8_t get_face_no(const unsigned int v=0) const
internal::MatrixFreeFunctions::DoFInfo::DoFAccessIndex get_dof_access_index() const
ScalarNumber shape_info_number_type
const ShapeInfoType & get_shape_info() const
const std::array< unsigned int, n_lanes > & get_cell_ids() const
const Number * begin_gradients() const
unsigned int get_subface_index() const
bool is_interior_face() const
ArrayView< Number > get_scratch_data() const
const Number * begin_values() const
std::uint8_t get_face_orientation(const unsigned int v=0) const
const Number * begin_hessians() const
void gather(const Number *base_ptr, const unsigned int *offsets)
void load(const OtherNumber *ptr)
#define DEAL_II_ALWAYS_INLINE
#define DEAL_II_OPENMP_SIMD_PRAGMA
#define DEAL_II_NAMESPACE_OPEN
#define DEAL_II_NAMESPACE_CLOSE
#define DEAL_II_ASSERT_UNREACHABLE()
#define DEAL_II_NOT_IMPLEMENTED()
static ::ExceptionBase & ExcNotImplemented()
#define Assert(cond, exc)
#define AssertDimension(dim1, dim2)
#define AssertIndexRange(index, range)
static ::ExceptionBase & ExcInternalError()
#define AssertThrow(cond, exc)
@ tensor_symmetric_no_collocation
@ tensor_symmetric_hermite
EvaluationFlags
The EvaluationFlags enum.
constexpr T fixed_power(const T t)
constexpr T pow(const T base, const int iexp)
void do_vectorized_add(const VectorizedArrayType src, Number2 *dst_ptr)
void embed_truncated_into_full_tensor_product(const unsigned int n_components, Number *values_dofs, const Number *values_dofs_actual, FEEvaluationData< dim, Number, is_face > &fe_eval)
constexpr bool use_collocation_evaluation(const unsigned int fe_degree, const unsigned int n_q_points_1d)
void adjust_for_face_orientation_per_lane(const unsigned int dim, const unsigned int n_components, const unsigned int v, const EvaluationFlags::EvaluationFlags flag, const unsigned int *orientation, const bool integrate, const std::size_t n_q_points, Number *tmp_values, VectorizedArrayType *values_quad, VectorizedArrayType *gradients_quad=nullptr, VectorizedArrayType *hessians_quad=nullptr)
void fe_face_evaluation_process_and_io(Processor &proc, const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, typename Processor::Number2_ *global_vector_ptr, const std::vector< ArrayView< const typename Processor::Number2_ > > *sm_ptr, const EvaluationData &fe_eval, typename Processor::VectorizedArrayType_ *temp1)
std::enable_if_t<(variant==evaluate_general), void > apply_matrix_vector_product(const Number2 *matrix, const Number *in, Number *out)
void do_vectorized_scatter_add(const VectorizedArrayType src, const unsigned int *indices, Number2 *dst_ptr)
void do_vectorized_gather(const Number2 *src_ptr, const unsigned int *indices, VectorizedArrayType &dst)
void do_vectorized_read(const Number2 *src_ptr, VectorizedArrayType &dst)
std::enable_if_t< contract_onto_face, void > interpolate_to_face(const Number2 *shape_values, const std::array< int, 2 > &n_blocks, const std::array< int, 2 > &steps, const Number *input, Number *DEAL_II_RESTRICT output, const int n_rows_runtime=0, const int stride_runtime=1)
void truncate_tensor_product_to_complete_degrees(const unsigned int n_components, Number *values_dofs_actual, const Number *values_dofs, FEEvaluationData< dim, Number, is_face > &fe_eval)
void adjust_for_face_orientation(const unsigned int dim, const unsigned int n_components, const EvaluationFlags::EvaluationFlags flag, const unsigned int *orientation, const bool integrate, const std::size_t n_q_points, Number *tmp_values, Number *values_quad, Number *gradients_quad, Number *hessians_quad)
constexpr unsigned int invalid_unsigned_int
::VectorizedArray< Number, width > min(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > max(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
static constexpr unsigned int max_children_per_cell
static bool run(const unsigned int n_components, const EvaluationFlags::EvaluationFlags integration_flag, Number *values_dofs, FEEvaluationData< dim, Number, true > &fe_eval, const bool sum_into_values)
static bool run(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, FEEvaluationData< dim, Number, true > &fe_eval)
static void evaluate_in_face(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, FEEvaluationData< dim, Number, true > &fe_eval, Number *temp, Number *scratch_data)
static void project_to_face(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, const Number *values_dofs, FEEvaluationData< dim, Number, true > &fe_eval, const bool use_vectorization, Number *temp, Number *scratch_data)
static bool evaluate_tensor_none(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, const Number *values_dofs, FEEvaluationData< dim, Number, true > &fe_eval)
static bool run(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, const Number *values_dofs, FEEvaluationData< dim, Number, true > &fe_eval)
static void adjust_quadrature_for_face_orientation(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, FEEvaluationData< dim, Number, true > &fe_eval, const bool use_vectorization, Number *temp)
static bool evaluate_tensor(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, const Number *values_dofs_actual, FEEvaluationData< dim, Number, true > &fe_eval)
void hermite_grad(T0 &temp_1, T0 &temp_2, const T1 &src_ptr_1, const T1 &src_ptr_2, const T2 &grad_weight)
static const int fe_degree_
static const bool do_integrate
void value_vectorized(T1 &temp, const T2 src_ptr)
void value(T1 &temp, const T2 &src_ptr)
VectorizedArrayType VectorizedArrayType_
void value_vectorized_indexed(T0 &temp, const T1 src_ptr, const T2 &indices)
void hermite_grad_vectorized(T0 &temp_1, T0 &temp_2, const T1 src_ptr_1, const T1 src_ptr_2, const T2 &grad_weight)
void hermite_grad_vectorized_indexed(T0 &temp_1, T0 &temp_2, const T1 src_ptr_1, const T1 src_ptr_2, const T2 &grad_weight, const T3 &indices_1, const T3 &indices_2)
typename VectorizedArrayType::value_type Number
static bool run(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, const Number2 *src_ptr, const std::vector< ArrayView< const Number2 > > *sm_ptr, FEEvaluationData< dim, VectorizedArrayType, true > &fe_eval)
static bool supports(const EvaluationFlags::EvaluationFlags evaluation_flag, const MatrixFreeFunctions::ShapeInfo< Number3 > &shape_info, const Number2 *vector_ptr, MatrixFreeFunctions::DoFInfo::IndexStorageVariants storage)
static bool run(const unsigned int n_components, const EvaluationFlags::EvaluationFlags integration_flag, FEEvaluationData< dim, Number, true > &fe_eval)
static const int fe_degree_
void value(const T0 &temp, T1 &dst_ptr)
void hermite_grad_vectorized(const T0 &temp_1, const T1 &temp_2, T2 dst_ptr_1, T3 dst_ptr_2, const T4 &grad_weight)
void hermite_grad_vectorized_indexed(const T0 &temp_1, const T0 &temp_2, T1 dst_ptr_1, T1 dst_ptr_2, const T2 &grad_weight, const T3 &indices_1, const T3 &indices_2)
static const bool do_integrate
VectorizedArrayType VectorizedArrayType_
void hermite_grad(const T0 &temp_1, const T0 &temp_2, T1 &dst_ptr_1, T1 &dst_ptr_2, const T2 &grad_weight)
void value_vectorized(const T0 &temp, T1 dst_ptr)
void value_vectorized_indexed(const T0 &temp, T1 dst_ptr, const T2 &indices)
typename VectorizedArrayType::value_type Number
static bool run(const unsigned int n_components, const EvaluationFlags::EvaluationFlags integration_flag, Number2 *dst_ptr, const std::vector< ArrayView< const Number2 > > *sm_ptr, FEEvaluationData< dim, VectorizedArrayType, true > &fe_eval)
static bool integrate_tensor(const unsigned int n_components, const EvaluationFlags::EvaluationFlags integration_flag, Number *values_dofs_actual, FEEvaluationData< dim, Number, true > &fe_eval, const bool sum_into_values)
static bool integrate_tensor_none(const unsigned int n_components, const EvaluationFlags::EvaluationFlags integration_flag, Number *values_dofs, FEEvaluationData< dim, Number, true > &fe_eval, const bool sum_into_values)
static void collect_from_face(const unsigned int n_components, const EvaluationFlags::EvaluationFlags integration_flag, Number *values_dofs, FEEvaluationData< dim, Number, true > &fe_eval, const bool use_vectorization, const Number *temp, Number *scratch_data, const bool sum_into_values)
static bool run(const unsigned int n_components, const EvaluationFlags::EvaluationFlags integration_flag, Number *values_dofs, FEEvaluationData< dim, Number, true > &fe_eval, const bool sum_into_values)
static void integrate_in_face(const unsigned int n_components, const EvaluationFlags::EvaluationFlags integration_flag, FEEvaluationData< dim, Number, true > &fe_eval, Number *temp, Number *scratch_data)
static void adjust_quadrature_for_face_orientation(const unsigned int n_components, const EvaluationFlags::EvaluationFlags integration_flag, FEEvaluationData< dim, Number, true > &fe_eval, const bool use_vectorization, Number *temp)
static bool run(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, const Number *values_dofs, FEEvaluationData< dim, Number, true > &fe_eval)
typename FEEvaluationData< dim, Number, true >::shape_info_number_type Number2
static void evaluate_or_integrate_in_face(const EvaluationFlags::EvaluationFlags evaluation_flag, const std::vector< MatrixFreeFunctions::UnivariateShapeData< Number2 > > &shape_data, Number *values_dofs_in, Number *values, Number *gradients, Number *scratch_data, const unsigned int subface_index, const unsigned int face_direction)
typename FEEvaluationData< dim, Number, true >::shape_info_number_type Number2
EvaluatorTensorProduct< symmetric_evaluate ? evaluate_evenodd : evaluate_general, dim - 1, fe_degree+1, n_q_points_1d, Number, Number2 > Eval
static void evaluate_in_face(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, const MatrixFreeFunctions::UnivariateShapeData< Number2 > &data, Number *values_dofs, Number *values_quad, Number *gradients_quad, Number *hessians_quad, Number *scratch_data, const unsigned int subface_index)
static Eval create_evaluator_tensor_product(const MatrixFreeFunctions::UnivariateShapeData< Number2 > &data, const unsigned int subface_index, const unsigned int direction)
static void integrate_in_face(const unsigned int n_components, const EvaluationFlags::EvaluationFlags integration_flag, const MatrixFreeFunctions::UnivariateShapeData< Number2 > &data, Number *values_dofs, Number *values_quad, Number *gradients_quad, Number *hessians_quad, Number *scratch_data, const unsigned int subface_index)
typename FEEvaluationData< dim, Number, true >::shape_info_number_type Number2
static void interpolate_quadrature(const unsigned int n_components, const EvaluationFlags::EvaluationFlags flags, const MatrixFreeFunctions::ShapeInfo< Number2 > &shape_info, const Number *input, Number *output, const unsigned int face_no)
static void interpolate_generic(const unsigned int n_components, const Number *input, Number *output, const EvaluationFlags::EvaluationFlags flag, const unsigned int face_no, const unsigned int n_points_1d, const std::array< AlignedVector< Number2 >, 2 > &shape_data, const unsigned int dofs_per_component_on_cell, const unsigned int dofs_per_component_on_face)
static void interpolate(const unsigned int n_components, const EvaluationFlags::EvaluationFlags flags, const MatrixFreeFunctions::ShapeInfo< Number2 > &shape_info, const Number *input, Number *output, const unsigned int face_no)
static void interpolate_raviart_thomas(const unsigned int n_components, const Number *input, Number *output, const EvaluationFlags::EvaluationFlags flag, const unsigned int face_no, const MatrixFreeFunctions::ShapeInfo< Number2 > &shape_info)
unsigned int n_q_points_face
std::vector< UnivariateShapeData< Number > > data
AlignedVector< Number > shape_values
AlignedVector< Number > shape_values_eo
AlignedVector< Number > shape_hessians_eo
AlignedVector< Number > shape_gradients_collocation_eo
unsigned int n_q_points_1d
AlignedVector< Number > shape_gradients_eo
AlignedVector< Number > shape_hessians
AlignedVector< Number > shape_gradients
std::array< AlignedVector< Number >, 2 > hessians_within_subface
std::array< AlignedVector< Number >, 2 > values_within_subface
std::array< AlignedVector< Number >, 2 > gradients_within_subface