35 #include <Eigen/Dense>
36 #include <Eigen/Eigenvalues>
62 int* ipiv =
new int[n];
74 for (
Index i = 0; i < n; i++) {
105 assert(column_stride == 1);
106 assert(vec_stride == 1);
115 for (
Index i = 0; i < n; i++) {
116 ipiv[i] = (int)indx[i];
121 &trans, &n_int, &one, LU.
mdata, &n_int, ipiv, rhs, &n_int, &info);
123 for (
Index i = 0; i < n; i++) {
142 assert(n == A.
nrows());
143 assert(n ==
x.nelem());
144 assert(n == b.
nelem());
175 int* ipiv =
new int[n];
187 work =
new double[lwork];
188 }
catch (std::bad_alloc& ba) {
190 "Error inverting matrix: Could not allocate workspace memory.");
201 throw runtime_error(
"Error inverting matrix: Matrix not of full rank.");
213 int n_int = int(n), lwork = n_int, info;
214 std::vector<int> ipiv(n);
223 throw runtime_error(
"Error inverting matrix: Matrix not of full rank.");
252 assert(n == A.
nrows());
253 assert(n == WR.
nelem());
254 assert(n == WI.
nelem());
255 assert(n == P.
nrows());
256 assert(n == P.
ncols());
264 int LDA, LDA_L, LDA_R, n_int, info;
269 char l_eig =
'N', r_eig =
'V';
272 int lwork = 2 * n_int + n_int * n_int;
273 double *work, *rwork;
275 rwork =
new double[2 * n_int];
276 work =
new double[lwork];
277 }
catch (std::bad_alloc& ba) {
278 throw std::runtime_error(
279 "Error diagonalizing: Could not allocate workspace memory.");
283 double* adata = A_tmp.
mdata;
284 double* rpdata = P2.
mdata;
285 double* lpdata =
new double[0];
286 double* wrdata = WR2.
mdata;
287 double* widata = WI2.
mdata;
312 for (
Index i = 0; i < n; i++)
313 for (
Index j = 0; j < n; j++) P(j, i) = P2(i, j);
336 assert(n == A.
nrows());
337 assert(n == W.
nelem());
338 assert(n == P.
nrows());
339 assert(n == P.
ncols());
347 char l_eig =
'N', r_eig =
'V';
350 int lwork = 2 * n_int + n_int * n_int;
371 for (
Index i = 0; i < n; i++)
372 for (
Index j = 0; j <= i; j++)
400 Matrix D(n, n),
N(n, n), X(n, n), cX(n, n, 0.0), B(n, n, 0.0);
401 Vector N_col_vec(n, 0.), F_col_vec(n, 0.);
406 j = 1 + floor(1. / log(2.) * log(A_norm_inf));
424 for (
Index k = 0; k <
q; k++) {
426 c *= (q_n - k_n + 1) / ((2 * q_n - k_n + 1) * k_n);
444 for (
Index i = 0; i < n; i++) {
446 lubacksub(F_col_vec, X, N_col_vec, indx);
447 F(
joker, i) = F_col_vec;
451 for (
Index k = 0; k < j_index; k++) {
464 Matrix P(n, n), invP(n, n);
470 for (
Index k = 0; k < n; k++) {
472 throw std::runtime_error(
473 "We require real eigenvalues for your chosen method.");
474 P(
joker, k) *= exp(WR[k]);
488 const Numeric e = 1.0 + floor(1.0 / log(2.0) * log(A_norm_inf));
489 const Index r = (e + 1.) > 0. ? (
Index)(e + 1.) : 0;
494 Eigen::Matrix4d X =
M;
495 Eigen::Matrix4d cX = c * X;
503 for (
Index k = 2; k <=
q; k++) {
506 cX.noalias() = c * X;
517 for (
Index k = 1; k <= r; k++) F *= F;
526 Eigen::EigenSolver<Eigen::Matrix4d> es;
529 const Eigen::Matrix4cd U = es.eigenvectors();
530 const Eigen::Vector4cd
q = es.eigenvalues();
531 const Eigen::Vector4cd eq =
q.array().exp();
532 Eigen::Matrix4cd res = U * eq.asDiagonal();
573 assert(n_partials == dF_upp.
npages());
574 assert(n_partials == dF_low.
npages());
575 assert(n_partials == dA_low.
npages());
576 for (
Index ii = 0; ii < n_partials; ii++) {
586 e = 1. + floor(1. / log(2.) * log(A_norm_inf));
587 Index r = (e + 1.) > 0. ? (
Index)(e + 1.) : 0;
592 Matrix M = A, X(n, n), cX(n, n), D(n, n);
603 Tensor3 dM_upp(n_partials, n, n), dM_low(n_partials, n, n),
604 dD_upp(n_partials, n, n), dD_low(n_partials, n, n),
605 Y_upp(n_partials, n, n), Y_low(n_partials, n, n),
606 cY_upp(n_partials, n, n), cY_low(n_partials, n, n);
607 for (
Index ii = 0; ii < n_partials; ii++) {
608 for (
Index jj = 0; jj < n; jj++) {
609 for (
Index kk = 0; kk < n; kk++) {
610 dM_upp(ii, jj, kk) = dA_upp(ii, jj, kk) * pow2rm1;
611 dM_low(ii, jj, kk) = dA_low(ii, jj, kk) * pow2rm1;
613 Y_upp(ii, jj, kk) = dM_upp(ii, jj, kk);
614 Y_low(ii, jj, kk) = dM_low(ii, jj, kk);
616 cY_upp(ii, jj, kk) = c * Y_upp(ii, jj, kk);
617 cY_low(ii, jj, kk) = c * Y_low(ii, jj, kk);
619 dF_upp(ii, jj, kk) = cY_upp(ii, jj, kk);
620 dF_low(ii, jj, kk) = cY_low(ii, jj, kk);
622 dD_upp(ii, jj, kk) = -cY_upp(ii, jj, kk);
623 dD_low(ii, jj, kk) = -cY_low(ii, jj, kk);
629 Matrix tmp1(n, n), tmp2(n, n);
631 for (
Index k = 2; k <=
q; k++) {
635 for (
Index ii = 0; ii < n_partials; ii++) {
636 Matrix tmp_low1(n, n), tmp_upp1(n, n), tmp_low2(n, n), tmp_upp2(n, n);
644 for (
Index jj = 0; jj < n; jj++) {
645 for (
Index kk = 0; kk < n; kk++) {
646 Y_upp(ii, jj, kk) = tmp_upp1(jj, kk) + tmp_upp2(jj, kk);
647 Y_low(ii, jj, kk) = tmp_low1(jj, kk) + tmp_low2(jj, kk);
650 cY_upp(ii, jj, kk) = c * Y_upp(ii, jj, kk);
651 cY_low(ii, jj, kk) = c * Y_low(ii, jj, kk);
654 dF_upp(ii, jj, kk) += cY_upp(ii, jj, kk);
655 dF_low(ii, jj, kk) += cY_low(ii, jj, kk);
660 dD_upp(ii, jj, kk) += cY_upp(ii, jj, kk);
661 dD_low(ii, jj, kk) += cY_low(ii, jj, kk);
664 dD_upp(ii, jj, kk) -= cY_upp(ii, jj, kk);
665 dD_low(ii, jj, kk) -= cY_low(ii, jj, kk);
674 for (
Index jj = 0; jj < n; jj++) {
675 for (
Index kk = 0; kk < n; kk++) {
676 X(jj, kk) = tmp1(jj, kk);
677 cX(jj, kk) = tmp1(jj, kk) * c;
678 F(jj, kk) += cX(jj, kk);
681 D(jj, kk) += cX(jj, kk);
683 D(jj, kk) -= cX(jj, kk);
696 for (
Index ii = 0; ii < n_partials; ii++) {
697 Matrix tmp_low(n, n), tmp_upp(n, n);
702 for (
Index jj = 0; jj < n; jj++) {
703 for (
Index kk = 0; kk < n; kk++) {
704 dF_upp(ii, jj, kk) -= tmp_upp(jj, kk);
705 dF_low(ii, jj, kk) -= tmp_low(jj, kk);
712 for (
Index jj = 0; jj < n; jj++) {
713 for (
Index kk = 0; kk < n; kk++) {
714 dF_upp(ii, jj, kk) = tmp_upp(jj, kk);
715 dF_low(ii, jj, kk) = tmp_low(jj, kk);
720 for (
Index k = 1; k <= r; k++) {
722 for (
Index ii = 0; ii < n_partials; ii++) {
723 Matrix tmp_low1(n, n), tmp_upp1(n, n), tmp_low2(n, n), tmp_upp2(n, n);
731 for (
Index jj = 0; jj < n; jj++) {
732 for (
Index kk = 0; kk < n; kk++) {
733 dF_upp(ii, jj, kk) = tmp_upp1(jj, kk) + tmp_upp2(jj, kk);
734 dF_low(ii, jj, kk) = tmp_low1(jj, kk) + tmp_low2(jj, kk);
793 const Numeric a = A(0, 0), b = A(0, 1), c = A(0, 2), d = A(0, 3), u = A(1, 2),
794 v = A(1, 3),
w = A(2, 3);
796 const Numeric b2 = b * b, c2 = c * c, d2 = d * d, u2 = u * u, v2 = v * v,
799 const Numeric Const2 =
b2 + c2 + d2 - u2 - v2 - w2;
802 Const1 =
b2 * (
b2 * 0.5 + c2 + d2 - u2 - v2 + w2);
803 Const1 += c2 * (c2 * 0.5 + d2 - u2 + v2 - w2);
804 Const1 += d2 * (d2 * 0.5 + u2 - v2 - w2);
805 Const1 += u2 * (u2 * 0.5 + v2 + w2);
806 Const1 += v2 * (v2 * 0.5 + w2);
808 Const1 += 8 * (b * d * u *
w - b * c * v *
w - c * d * u * v);
812 Const1 =
sqrt(Const1);
819 F(0, 0) = F(1, 1) = F(2, 2) = F(3, 3) = exp_a;
825 const Numeric x = sqrt_BpA.real() * sqrt_05;
826 const Numeric y = sqrt_BmA.imag() * sqrt_05;
834 const Numeric inv_x2y2 = 1.0 / x2y2;
837 Numeric inv_y = 0.0, inv_x = 0.0;
844 C2 = (1.0 - cos_y) * inv_x2y2;
845 C3 = (1.0 - sin_y * inv_y) * inv_x2y2;
846 }
else if (
y == 0.0) {
850 C2 = (cosh_x - 1.0) * inv_x2y2;
851 C3 = (sinh_x * inv_x - 1.0) * inv_x2y2;
856 C0 = (cos_y *
x2 + cosh_x * y2) * inv_x2y2;
857 C1 = (sin_y *
x2 * inv_y + sinh_x * y2 * inv_x) * inv_x2y2;
858 C2 = (cosh_x - cos_y) * inv_x2y2;
859 C3 = (sinh_x * inv_x - sin_y * inv_y) * inv_x2y2;
862 F(0, 0) = F(1, 1) = F(2, 2) = F(3, 3) = C0;
864 F(0, 0) += C2 * (
b2 + c2 + d2);
866 F(1, 1) += C2 * (
b2 - u2 - v2);
868 F(2, 2) += C2 * (c2 - u2 - w2);
870 F(3, 3) += C2 * (d2 - v2 - w2);
872 F(0, 1) = F(1, 0) = C1 * b;
875 C2 * (-c * u - d * v) +
876 C3 * (b * (
b2 + c2 + d2) - u * (b * u - d *
w) - v * (b * v + c *
w));
879 C2 * (c * u + d * v) +
880 C3 * (-b * (-
b2 + u2 + v2) + c * (b * c - v *
w) + d * (b * d + u *
w));
882 F(0, 2) = F(2, 0) = C1 * c;
885 C2 * (b * u - d *
w) +
886 C3 * (c * (
b2 + c2 + d2) - u * (c * u + d * v) -
w * (b * v + c *
w));
889 C2 * (-b * u + d *
w) +
890 C3 * (b * (b * c - v *
w) - c * (-c2 + u2 + w2) + d * (c * d - u * v));
892 F(0, 3) = F(3, 0) = C1 * d;
895 C2 * (b * v + c *
w) +
896 C3 * (d * (
b2 + c2 + d2) - v * (c * u + d * v) +
w * (b * u - d *
w));
899 C2 * (-b * v - c *
w) +
900 C3 * (b * (b * d + u *
w) + c * (c * d - u * v) - d * (-d2 + v2 + w2));
902 F(1, 2) = F(2, 1) = C2 * (b * c - v *
w);
904 F(1, 2) += C1 * u + C3 * (c * (c * u + d * v) - u * (-
b2 + u2 + v2) -
905 w * (b * d + u *
w));
907 F(2, 1) += -C1 * u + C3 * (-b * (b * u - d *
w) + u * (-c2 + u2 + w2) -
908 v * (c * d - u * v));
910 F(1, 3) = F(3, 1) = C2 * (b * d + u *
w);
912 F(1, 3) += C1 * v + C3 * (d * (c * u + d * v) - v * (-
b2 + u2 + v2) +
913 w * (b * c - v *
w));
915 F(3, 1) += -C1 * v + C3 * (-b * (b * v + c *
w) - u * (c * d - u * v) +
916 v * (-d2 + v2 + w2));
918 F(2, 3) = F(3, 2) = C2 * (c * d - u * v);
920 F(2, 3) += C1 *
w + C3 * (-d * (b * u - d *
w) + v * (b * c - v *
w) -
921 w * (-c2 + u2 + w2));
923 F(3, 2) += -C1 *
w + C3 * (-c * (b * v + c *
w) + u * (b * d + u *
w) +
924 w * (-d2 + v2 + w2));
971 eigA(0, 0) = eigA(1, 1) = eigA(2, 2) = eigA(3, 3) = 0.0;
973 Eigen::Matrix4d eigA2 = eigA;
975 Eigen::Matrix4d eigA3 = eigA2;
980 const Numeric a = A(0, 0), b = A(0, 1), c = A(0, 2), d = A(0, 3), u = A(1, 2),
981 v = A(1, 3),
w = A(2, 3);
983 const Numeric b2 = b * b, c2 = c * c, d2 = d * d, u2 = u * u, v2 = v * v,
986 const Numeric Const2 =
b2 + c2 + d2 - u2 - v2 - w2;
989 Const1 =
b2 * (
b2 * 0.5 + c2 + d2 - u2 - v2 + w2);
990 Const1 += c2 * (c2 * 0.5 + d2 - u2 + v2 - w2);
991 Const1 += d2 * (d2 * 0.5 + u2 - v2 - w2);
992 Const1 += u2 * (u2 * 0.5 + v2 + w2);
993 Const1 += v2 * (v2 * 0.5 + w2);
995 Const1 += 8 * (b * d * u *
w - b * c * v *
w - c * d * u * v);
999 Const1 =
sqrt(Const1);
1006 F(0, 0) = F(1, 1) = F(2, 2) = F(3, 3) = exp_a;
1012 const Numeric x = sqrt_BpA.real() * sqrt_05;
1013 const Numeric y = sqrt_BmA.imag() * sqrt_05;
1021 const Numeric inv_x2y2 = 1.0 / x2y2;
1024 Numeric inv_y = 0.0, inv_x = 0.0;
1031 C2 = (1.0 - cos_y) * inv_x2y2;
1032 C3 = (1.0 - sin_y * inv_y) * inv_x2y2;
1033 }
else if (
y == 0.0) {
1037 C2 = (cosh_x - 1.0) * inv_x2y2;
1038 C3 = (sinh_x * inv_x - 1.0) * inv_x2y2;
1043 C0 = (cos_y *
x2 + cosh_x * y2) * inv_x2y2;
1044 C1 = (sin_y *
x2 * inv_y + sinh_x * y2 * inv_x) * inv_x2y2;
1045 C2 = (cosh_x - cos_y) * inv_x2y2;
1046 C3 = (sinh_x * inv_x - sin_y * inv_y) * inv_x2y2;
1049 eigF.noalias() = C1 * eigA + C2 * eigA2 + C3 * eigA3;
1121 const Numeric a = A(0, 0), b = A(0, 1), c = A(0, 2), d = A(0, 3), u = A(1, 2),
1122 v = A(1, 3),
w = A(2, 3);
1124 const Numeric b2 = b * b, c2 = c * c, d2 = d * d, u2 = u * u, v2 = v * v,
1127 const Numeric Const2 =
b2 + c2 + d2 - u2 - v2 - w2;
1130 Const1 =
b2 * (
b2 * 0.5 + c2 + d2 - u2 - v2 + w2);
1131 Const1 += c2 * (c2 * 0.5 + d2 - u2 + v2 - w2);
1132 Const1 += d2 * (d2 * 0.5 + u2 - v2 - w2);
1133 Const1 += u2 * (u2 * 0.5 + v2 + w2);
1134 Const1 += v2 * (v2 * 0.5 + w2);
1136 Const1 += 8 * (b * d * u *
w - b * c * v *
w - c * d * u * v);
1140 Const1 =
sqrt(Const1);
1147 F(0, 0) = F(1, 1) = F(2, 2) = F(3, 3) = exp_a;
1148 for (
Index i = 0; i < npd; i++) {
1158 const Numeric x = sqrt_BpA.real() * sqrt_05;
1159 const Numeric y = sqrt_BmA.imag() * sqrt_05;
1167 const Numeric inv_x2y2 = 1.0 / x2y2;
1170 Numeric inv_y = 0.0, inv_x = 0.0;
1177 C2 = (1.0 - cos_y) * inv_x2y2;
1178 C3 = (1.0 - sin_y * inv_y) * inv_x2y2;
1179 }
else if (
y == 0.0) {
1183 C2 = (cosh_x - 1.0) * inv_x2y2;
1184 C3 = (sinh_x * inv_x - 1.0) * inv_x2y2;
1189 C0 = (cos_y *
x2 + cosh_x * y2) * inv_x2y2;
1190 C1 = (sin_y *
x2 * inv_y + sinh_x * y2 * inv_x) * inv_x2y2;
1191 C2 = (cosh_x - cos_y) * inv_x2y2;
1192 C3 = (sinh_x * inv_x - sin_y * inv_y) * inv_x2y2;
1195 F(0, 0) = F(1, 1) = F(2, 2) = F(3, 3) = C0;
1197 F(0, 0) += C2 * (
b2 + c2 + d2);
1199 F(1, 1) += C2 * (
b2 - u2 - v2);
1201 F(2, 2) += C2 * (c2 - u2 - w2);
1203 F(3, 3) += C2 * (d2 - v2 - w2);
1205 F(0, 1) = F(1, 0) = C1 * b;
1208 C2 * (-c * u - d * v) +
1209 C3 * (b * (
b2 + c2 + d2) - u * (b * u - d *
w) - v * (b * v + c *
w));
1212 C2 * (c * u + d * v) +
1213 C3 * (-b * (-
b2 + u2 + v2) + c * (b * c - v *
w) + d * (b * d + u *
w));
1215 F(0, 2) = F(2, 0) = C1 * c;
1218 C2 * (b * u - d *
w) +
1219 C3 * (c * (
b2 + c2 + d2) - u * (c * u + d * v) -
w * (b * v + c *
w));
1222 C2 * (-b * u + d *
w) +
1223 C3 * (b * (b * c - v *
w) - c * (-c2 + u2 + w2) + d * (c * d - u * v));
1225 F(0, 3) = F(3, 0) = C1 * d;
1228 C2 * (b * v + c *
w) +
1229 C3 * (d * (
b2 + c2 + d2) - v * (c * u + d * v) +
w * (b * u - d *
w));
1232 C2 * (-b * v - c *
w) +
1233 C3 * (b * (b * d + u *
w) + c * (c * d - u * v) - d * (-d2 + v2 + w2));
1235 F(1, 2) = F(2, 1) = C2 * (b * c - v *
w);
1237 F(1, 2) += C1 * u + C3 * (c * (c * u + d * v) - u * (-
b2 + u2 + v2) -
1238 w * (b * d + u *
w));
1240 F(2, 1) += -C1 * u + C3 * (-b * (b * u - d *
w) + u * (-c2 + u2 + w2) -
1241 v * (c * d - u * v));
1243 F(1, 3) = F(3, 1) = C2 * (b * d + u *
w);
1245 F(1, 3) += C1 * v + C3 * (d * (c * u + d * v) - v * (-
b2 + u2 + v2) +
1246 w * (b * c - v *
w));
1248 F(3, 1) += -C1 * v + C3 * (-b * (b * v + c *
w) - u * (c * d - u * v) +
1249 v * (-d2 + v2 + w2));
1251 F(2, 3) = F(3, 2) = C2 * (c * d - u * v);
1253 F(2, 3) += C1 *
w + C3 * (-d * (b * u - d *
w) + v * (b * c - v *
w) -
1254 w * (-c2 + u2 + w2));
1256 F(3, 2) += -C1 *
w + C3 * (-c * (b * v + c *
w) + u * (b * d + u *
w) +
1257 w * (-d2 + v2 + w2));
1262 const Numeric inv_x2 = inv_x * inv_x;
1263 const Numeric inv_y2 = inv_y * inv_y;
1265 for (
Index upp_or_low = 0; upp_or_low < 2; upp_or_low++) {
1266 for (
Index i = 0; i < npd; i++) {
1272 const Numeric da = dA(0, 0), db = dA(0, 1), dc = dA(0, 2),
1273 dd = dA(0, 3), du = dA(1, 2), dv = dA(1, 3),
1276 const Numeric db2 = 2 * db * b, dc2 = 2 * dc * c, dd2 = 2 * dd * d,
1277 du2 = 2 * du * u, dv2 = 2 * dv * v, dw2 = 2 *
dw *
w;
1279 const Numeric dConst2 = db2 + dc2 + dd2 - du2 - dv2 - dw2;
1283 dConst1 = db2 * (
b2 * 0.5 + c2 + d2 - u2 - v2 + w2);
1284 dConst1 +=
b2 * (db2 * 0.5 + dc2 + dd2 - du2 - dv2 + dw2);
1286 dConst1 += dc2 * (c2 * 0.5 + d2 - u2 + v2 - w2);
1287 dConst1 += c2 * (dc2 * 0.5 + dd2 - du2 + dv2 - dw2);
1289 dConst1 += dd2 * (d2 * 0.5 + u2 - v2 - w2);
1290 dConst1 += d2 * (dd2 * 0.5 + du2 - dv2 - dw2);
1292 dConst1 += du2 * (u2 * 0.5 + v2 + w2);
1293 dConst1 += u2 * (du2 * 0.5 + dv2 + dw2);
1295 dConst1 += dv2 * (v2 * 0.5 + w2);
1296 dConst1 += v2 * (dv2 * 0.5 + dw2);
1298 dConst1 += 4 * ((db * d * u *
w - db * c * v *
w - dc * d * u * v +
1299 b * dd * u *
w - b * dc * v *
w - c * dd * u * v +
1300 b * d * du *
w - b * c * dv *
w - c * d * du * v +
1301 b * d * u *
dw - b * c * v *
dw - c * d * u * dv));
1302 dConst1 += dw2 * w2;
1310 (0.5 * (dConst2 - dConst1) / sqrt_BmA).imag() * sqrt_05;
1314 dC2 = -2 *
y * dy * C2 * inv_x2y2 + dy * sin_y * inv_x2y2;
1315 dC3 = -2 *
y * dy * C3 * inv_x2y2 +
1316 (dy * sin_y * inv_y2 - cos_y * dy * inv_y) * inv_x2y2;
1318 }
else if (
y == 0.0) {
1320 (0.5 * (dConst2 + dConst1) / sqrt_BpA).real() * sqrt_05;
1324 dC2 = -2 *
x *
dx * C2 * inv_x2y2 +
dx * sinh_x * inv_x2y2;
1325 dC3 = -2 *
x *
dx * C3 * inv_x2y2 +
1326 (cosh_x *
dx * inv_x -
dx * sinh_x * inv_x2) * inv_x2y2;
1329 (0.5 * (dConst2 + dConst1) / sqrt_BpA).real() * sqrt_05;
1331 (0.5 * (dConst2 - dConst1) / sqrt_BmA).imag() * sqrt_05;
1334 const Numeric dx2dy2 = dx2 + dy2;
1336 dC0 = -dx2dy2 * C0 * inv_x2y2 +
1337 (2 * cos_y *
dx *
x + 2 * cosh_x * dy *
y +
dx * sinh_x * y2 -
1341 dC1 = -dx2dy2 * C1 * inv_x2y2 +
1342 (cos_y * dy *
x2 * inv_y + dx2 * sin_y * inv_y -
1343 dy * sin_y *
x2 * inv_y2 -
dx * sinh_x * y2 * inv_x2 +
1344 cosh_x *
dx * y2 * inv_x + dy2 * sinh_x * inv_x) *
1347 dC2 = -dx2dy2 * C2 * inv_x2y2 + (
dx * sinh_x + dy * sin_y) * inv_x2y2;
1349 dC3 = -dx2dy2 * C3 * inv_x2y2 +
1350 (dy * sin_y * inv_y2 - cos_y * dy * inv_y +
1351 cosh_x *
dx * inv_x -
dx * sinh_x * inv_x2) *
1355 dF(0, 0) = dF(1, 1) = dF(2, 2) = dF(3, 3) = dC0;
1357 dF(0, 0) += dC2 * (
b2 + c2 + d2) + C2 * (db2 + dc2 + dd2);
1359 dF(1, 1) += dC2 * (
b2 - u2 - v2) + C2 * (db2 - du2 - dv2);
1361 dF(2, 2) += dC2 * (c2 - u2 - w2) + C2 * (dc2 - du2 - dw2);
1363 dF(3, 3) += dC2 * (d2 - v2 - w2) + C2 * (dd2 - dv2 - dw2);
1365 dF(0, 1) = dF(1, 0) = db * C1 + b * dC1;
1367 dF(0, 1) += dC2 * (-c * u - d * v) +
1368 C2 * (-dc * u - dd * v - c * du - d * dv) +
1369 dC3 * (b * (
b2 + c2 + d2) - u * (b * u - d *
w) -
1370 v * (b * v + c *
w)) +
1371 C3 * (db * (
b2 + c2 + d2) - du * (b * u - d *
w) -
1372 dv * (b * v + c *
w) + b * (db2 + dc2 + dd2) -
1373 u * (db * u - dd *
w) - v * (db * v + dc *
w) -
1374 u * (b * du - d *
dw) - v * (b * dv + c *
dw));
1375 dF(1, 0) += dC2 * (c * u + d * v) +
1376 C2 * (dc * u + dd * v + c * du + d * dv) +
1377 dC3 * (-b * (-
b2 + u2 + v2) + c * (b * c - v *
w) +
1378 d * (b * d + u *
w)) +
1379 C3 * (-db * (-
b2 + u2 + v2) + dc * (b * c - v *
w) +
1380 dd * (b * d + u *
w) - b * (-db2 + du2 + dv2) +
1381 c * (db * c - dv *
w) + d * (db * d + du *
w) +
1382 c * (b * dc - v *
dw) + d * (b * dd + u *
dw));
1384 dF(0, 2) = dF(2, 0) = dC1 * c + C1 * dc;
1386 dF(0, 2) += dC2 * (b * u - d *
w) +
1387 C2 * (db * u - dd *
w + b * du - d *
dw) +
1388 dC3 * (c * (
b2 + c2 + d2) - u * (c * u + d * v) -
1389 w * (b * v + c *
w)) +
1390 C3 * (dc * (
b2 + c2 + d2) - du * (c * u + d * v) -
1391 dw * (b * v + c *
w) + c * (db2 + dc2 + dd2) -
1392 u * (dc * u + dd * v) -
w * (db * v + dc *
w) -
1393 u * (c * du + d * dv) -
w * (b * dv + c *
dw));
1394 dF(2, 0) += dC2 * (-b * u + d *
w) +
1395 C2 * (-db * u + dd *
w - b * du + d *
dw) +
1396 dC3 * (b * (b * c - v *
w) - c * (-c2 + u2 + w2) +
1397 d * (c * d - u * v)) +
1398 C3 * (db * (b * c - v *
w) - dc * (-c2 + u2 + w2) +
1399 dd * (c * d - u * v) + b * (db * c - dv *
w) -
1400 c * (-dc2 + du2 + dw2) + d * (dc * d - du * v) +
1401 b * (b * dc - v *
dw) + d * (c * dd - u * dv));
1403 dF(0, 3) = dF(3, 0) = dC1 * d + C1 * dd;
1405 dF(0, 3) += dC2 * (b * v + c *
w) +
1406 C2 * (db * v + dc *
w + b * dv + c *
dw) +
1407 dC3 * (d * (
b2 + c2 + d2) - v * (c * u + d * v) +
1408 w * (b * u - d *
w)) +
1409 C3 * (dd * (
b2 + c2 + d2) - dv * (c * u + d * v) +
1410 dw * (b * u - d *
w) + d * (db2 + dc2 + dd2) -
1411 v * (dc * u + dd * v) +
w * (db * u - dd *
w) -
1412 v * (c * du + d * dv) +
w * (b * du - d *
dw));
1413 dF(3, 0) += dC2 * (-b * v - c *
w) +
1414 C2 * (-db * v - dc *
w - b * dv - c *
dw) +
1415 dC3 * (b * (b * d + u *
w) + c * (c * d - u * v) -
1416 d * (-d2 + v2 + w2)) +
1417 C3 * (db * (b * d + u *
w) + dc * (c * d - u * v) -
1418 dd * (-d2 + v2 + w2) + b * (db * d + du *
w) +
1419 c * (dc * d - du * v) - d * (-dd2 + dv2 + dw2) +
1420 b * (b * dd + u *
dw) + c * (c * dd - u * dv));
1422 dF(1, 2) = dF(2, 1) =
1423 dC2 * (b * c - v *
w) + C2 * (db * c + b * dc - dv *
w - v *
dw);
1425 dF(1, 2) += dC1 * u + C1 * du +
1426 dC3 * (c * (c * u + d * v) - u * (-
b2 + u2 + v2) -
1427 w * (b * d + u *
w)) +
1428 C3 * (dc * (c * u + d * v) - du * (-
b2 + u2 + v2) -
1429 dw * (b * d + u *
w) + c * (dc * u + dd * v) -
1430 u * (-db2 + du2 + dv2) -
w * (db * d + du *
w) +
1431 c * (c * du + d * dv) -
w * (b * dd + u *
dw));
1432 dF(2, 1) += -dC1 * u - C1 * du +
1433 dC3 * (-b * (b * u - d *
w) + u * (-c2 + u2 + w2) -
1434 v * (c * d - u * v)) +
1435 C3 * (-db * (b * u - d *
w) + du * (-c2 + u2 + w2) -
1436 dv * (c * d - u * v) - b * (db * u - dd *
w) +
1437 u * (-dc2 + du2 + dw2) - v * (dc * d - du * v) -
1438 b * (b * du - d *
dw) - v * (c * dd - u * dv));
1440 dF(1, 3) = dF(3, 1) =
1441 dC2 * (b * d + u *
w) + C2 * (db * d + b * dd + du *
w + u *
dw);
1443 dF(1, 3) += dC1 * v + C1 * dv +
1444 dC3 * (d * (c * u + d * v) - v * (-
b2 + u2 + v2) +
1445 w * (b * c - v *
w)) +
1446 C3 * (dd * (c * u + d * v) - dv * (-
b2 + u2 + v2) +
1447 dw * (b * c - v *
w) + d * (dc * u + dd * v) -
1448 v * (-db2 + du2 + dv2) +
w * (db * c - dv *
w) +
1449 d * (c * du + d * dv) +
w * (b * dc - v *
dw));
1450 dF(3, 1) += -dC1 * v - C1 * dv +
1451 dC3 * (-b * (b * v + c *
w) - u * (c * d - u * v) +
1452 v * (-d2 + v2 + w2)) +
1453 C3 * (-db * (b * v + c *
w) - du * (c * d - u * v) +
1454 dv * (-d2 + v2 + w2) - b * (db * v + dc *
w) -
1455 u * (dc * d - du * v) + v * (-dd2 + dv2 + dw2) -
1456 b * (b * dv + c *
dw) - u * (c * dd - u * dv));
1458 dF(2, 3) = dF(3, 2) =
1459 dC2 * (c * d - u * v) + C2 * (dc * d + c * dd - du * v - u * dv);
1461 dF(2, 3) += dC1 *
w + C1 *
dw +
1462 dC3 * (-d * (b * u - d *
w) + v * (b * c - v *
w) -
1463 w * (-c2 + u2 + w2)) +
1464 C3 * (-dd * (b * u - d *
w) + dv * (b * c - v *
w) -
1465 dw * (-c2 + u2 + w2) - d * (db * u - dd *
w) +
1466 v * (db * c - dv *
w) -
w * (-dc2 + du2 + dw2) -
1467 d * (b * du - d *
dw) + v * (b * dc - v *
dw));
1468 dF(3, 2) += -dC1 *
w - C1 *
dw +
1469 dC3 * (-c * (b * v + c *
w) + u * (b * d + u *
w) +
1470 w * (-d2 + v2 + w2)) +
1471 C3 * (-dc * (b * v + c *
w) + du * (b * d + u *
w) +
1472 dw * (-d2 + v2 + w2) - c * (db * v + dc *
w) +
1473 u * (db * d + du *
w) +
w * (-dd2 + dv2 + dw2) -
1474 c * (b * dv + c *
dw) + u * (b * dd + u *
dw));
1479 dF(0, 0) += F(0, 0) * da;
1480 dF(0, 1) += F(0, 1) * da;
1481 dF(0, 2) += F(0, 2) * da;
1482 dF(0, 3) += F(0, 3) * da;
1483 dF(1, 0) += F(1, 0) * da;
1484 dF(1, 1) += F(1, 1) * da;
1485 dF(1, 2) += F(1, 2) * da;
1486 dF(1, 3) += F(1, 3) * da;
1487 dF(2, 0) += F(2, 0) * da;
1488 dF(2, 1) += F(2, 1) * da;
1489 dF(2, 2) += F(2, 2) * da;
1490 dF(2, 3) += F(2, 3) * da;
1491 dF(3, 0) += F(3, 0) * da;
1492 dF(3, 1) += F(3, 1) * da;
1493 dF(3, 2) += F(3, 2) * da;
1494 dF(3, 3) += F(3, 3) * da;
1563 eigA(0, 0) = eigA(1, 1) = eigA(2, 2) = eigA(3, 3) = 0.0;
1565 Eigen::Matrix4d eigA2 = eigA;
1567 Eigen::Matrix4d eigA3 = eigA2;
1573 const Numeric a = A(0, 0), b = A(0, 1), c = A(0, 2), d = A(0, 3), u = A(1, 2),
1574 v = A(1, 3),
w = A(2, 3);
1576 const Numeric b2 = b * b, c2 = c * c, d2 = d * d, u2 = u * u, v2 = v * v,
1579 const Numeric Const2 =
b2 + c2 + d2 - u2 - v2 - w2;
1582 Const1 =
b2 * (
b2 * 0.5 + c2 + d2 - u2 - v2 + w2);
1583 Const1 += c2 * (c2 * 0.5 + d2 - u2 + v2 - w2);
1584 Const1 += d2 * (d2 * 0.5 + u2 - v2 - w2);
1585 Const1 += u2 * (u2 * 0.5 + v2 + w2);
1586 Const1 += v2 * (v2 * 0.5 + w2);
1588 Const1 += 8 * (b * d * u *
w - b * c * v *
w - c * d * u * v);
1592 Const1 =
sqrt(Const1);
1599 F(0, 0) = F(1, 1) = F(2, 2) = F(3, 3) = exp_a;
1600 for (
Index i = 0; i < npd; i++) {
1610 const Numeric x = sqrt_BpA.real() * sqrt_05;
1611 const Numeric y = sqrt_BmA.imag() * sqrt_05;
1619 const Numeric inv_x2y2 = 1.0 / x2y2;
1622 Numeric inv_y = 0.0, inv_x = 0.0;
1629 C2 = (1.0 - cos_y) * inv_x2y2;
1630 C3 = (1.0 - sin_y * inv_y) * inv_x2y2;
1631 }
else if (
y == 0.0) {
1635 C2 = (cosh_x - 1.0) * inv_x2y2;
1636 C3 = (sinh_x * inv_x - 1.0) * inv_x2y2;
1641 C0 = (cos_y *
x2 + cosh_x * y2) * inv_x2y2;
1642 C1 = (sin_y *
x2 * inv_y + sinh_x * y2 * inv_x) * inv_x2y2;
1643 C2 = (cosh_x - cos_y) * inv_x2y2;
1644 C3 = (sinh_x * inv_x - sin_y * inv_y) * inv_x2y2;
1647 eigF(0, 0) = eigF(1, 1) = eigF(2, 2) = eigF(3, 3) = C0;
1648 eigF.noalias() = C1 * eigA + C2 * eigA2 + C3 * eigA3;
1656 const Numeric inv_x2 = inv_x * inv_x;
1657 const Numeric inv_y2 = inv_y * inv_y;
1659 for (
Index upp_or_low = 0; upp_or_low < 2; upp_or_low++) {
1660 for (
Index i = 0; i < npd; i++) {
1667 const Numeric da = dA(0, 0), db = dA(0, 1), dc = dA(0, 2),
1668 dd = dA(0, 3), du = dA(1, 2), dv = dA(1, 3),
1671 dA(0, 0) = dA(1, 1) = dA(2, 2) = dA(3, 3) = 0.0;
1673 Eigen::Matrix4d dA2;
1675 dA2.noalias() = eigA * dA;
1676 dA2.noalias() += dA * eigA;
1678 Eigen::Matrix4d dA3;
1679 dA3.noalias() = dA2 * eigA;
1680 dA3.noalias() += eigA2 * dA;
1682 const Numeric db2 = 2 * db * b, dc2 = 2 * dc * c, dd2 = 2 * dd * d,
1683 du2 = 2 * du * u, dv2 = 2 * dv * v, dw2 = 2 *
dw *
w;
1685 const Numeric dConst2 = db2 + dc2 + dd2 - du2 - dv2 - dw2;
1689 dConst1 = db2 * (
b2 * 0.5 + c2 + d2 - u2 - v2 + w2);
1690 dConst1 +=
b2 * (db2 * 0.5 + dc2 + dd2 - du2 - dv2 + dw2);
1692 dConst1 += dc2 * (c2 * 0.5 + d2 - u2 + v2 - w2);
1693 dConst1 += c2 * (dc2 * 0.5 + dd2 - du2 + dv2 - dw2);
1695 dConst1 += dd2 * (d2 * 0.5 + u2 - v2 - w2);
1696 dConst1 += d2 * (dd2 * 0.5 + du2 - dv2 - dw2);
1698 dConst1 += du2 * (u2 * 0.5 + v2 + w2);
1699 dConst1 += u2 * (du2 * 0.5 + dv2 + dw2);
1701 dConst1 += dv2 * (v2 * 0.5 + w2);
1702 dConst1 += v2 * (dv2 * 0.5 + dw2);
1704 dConst1 += 4 * ((db * d * u *
w - db * c * v *
w - dc * d * u * v +
1705 b * dd * u *
w - b * dc * v *
w - c * dd * u * v +
1706 b * d * du *
w - b * c * dv *
w - c * d * du * v +
1707 b * d * u *
dw - b * c * v *
dw - c * d * u * dv));
1708 dConst1 += dw2 * w2;
1715 (0.5 * (dConst2 - dConst1) / sqrt_BmA).imag() * sqrt_05;
1718 -2 *
y * dy * C2 * inv_x2y2 + dy * sin_y * inv_x2y2;
1720 -2 *
y * dy * C3 * inv_x2y2 +
1721 (dy * sin_y * inv_y2 - cos_y * dy * inv_y) * inv_x2y2;
1723 dF.noalias() = dC2 * eigA2;
1724 +C2* dA2 + dC3* eigA3 + C3* dA3;
1726 dF.noalias() += eigF * da;
1727 }
else if (
y == 0.0) {
1729 (0.5 * (dConst2 + dConst1) / sqrt_BpA).real() * sqrt_05;
1732 -2 *
x *
dx * C2 * inv_x2y2 +
dx * sinh_x * inv_x2y2;
1734 -2 *
x *
dx * C3 * inv_x2y2 +
1735 (cosh_x *
dx * inv_x -
dx * sinh_x * inv_x2) * inv_x2y2;
1737 dF.noalias() = dC2 * eigA2 + C2 * dA2 + dC3 * eigA3 + C3 * dA3;
1739 dF.noalias() += eigF * da;
1742 (0.5 * (dConst2 + dConst1) / sqrt_BpA).real() * sqrt_05;
1744 (0.5 * (dConst2 - dConst1) / sqrt_BmA).imag() * sqrt_05;
1747 const Numeric dx2dy2 = dx2 + dy2;
1749 const Numeric dC0 = -dx2dy2 * C0 * inv_x2y2 +
1750 (2 * cos_y *
dx *
x + 2 * cosh_x * dy *
y +
1751 dx * sinh_x * y2 - dy * sin_y *
x2) *
1755 -dx2dy2 * C1 * inv_x2y2 +
1756 (cos_y * dy *
x2 * inv_y + dx2 * sin_y * inv_y -
1757 dy * sin_y *
x2 * inv_y2 -
dx * sinh_x * y2 * inv_x2 +
1758 cosh_x *
dx * y2 * inv_x + dy2 * sinh_x * inv_x) *
1762 -dx2dy2 * C2 * inv_x2y2 + (
dx * sinh_x + dy * sin_y) * inv_x2y2;
1764 const Numeric dC3 = -dx2dy2 * C3 * inv_x2y2 +
1765 (dy * sin_y * inv_y2 - cos_y * dy * inv_y +
1766 cosh_x *
dx * inv_x -
dx * sinh_x * inv_x2) *
1769 dF.noalias() = dC1 * eigA + C1 * dA + dC2 * eigA2 + C2 * dA2 +
1770 dC3 * eigA3 + C3 * dA3;
1776 dF.noalias() += eigF * da;
1795 assert(n_partials == dF_upp.
npages());
1796 assert(n_partials == dF_low.
npages());
1797 assert(n_partials == dA_low.
npages());
1798 for (
Index ii = 0; ii < n_partials; ii++) {
1807 const Numeric e = 1.0 + floor(1.0 / log(2.0) * log(A_norm_inf));
1808 const Index r = (e + 1.) > 0. ? (
Index)(e + 1.) : 0;
1815 Eigen::Matrix4d X =
M;
1816 Eigen::Matrix4d cX = c * X;
1820 eigF.noalias() += cX;
1829 dFu.reserve(n_partials);
1830 dFl.reserve(n_partials);
1832 for (
Index i = 0; i < n_partials; i++) {
1839 cYu[i].noalias() = c * dMu[i];
1840 cYl[i].noalias() = c * dMl[i];
1852 for (
Index k = 2; k <=
q; k++) {
1854 for (
Index i = 0; i < n_partials; i++) {
1855 Yu[i] = dMu[i] * X +
M * Yu[i];
1856 Yl[i] = dMl[i] * X +
M * Yl[i];
1858 cYu[i].noalias() = c * Yu[i];
1859 cYl[i].noalias() = c * Yl[i];
1861 dFu[i].noalias() += cYu[i];
1862 dFl[i].noalias() += cYl[i];
1866 cX.noalias() = c * X;
1867 eigF.noalias() += cX;
1872 for (
Index i = 0; i < n_partials; i++) {
1873 dDu[i].noalias() += cYu[i];
1874 dDl[i].noalias() += cYl[i];
1879 for (
Index i = 0; i < n_partials; i++) {
1880 dDu[i].noalias() -= cYu[i];
1881 dDl[i].noalias() -= cYl[i];
1887 const Eigen::Matrix4d invD = D.inverse();
1890 for (
Index i = 0; i < n_partials; i++) {
1891 dFu[i] = invD * (dFu[i] - dDu[i] * eigF);
1892 dFl[i] = invD * (dFl[i] - dDl[i] * eigF);
1895 for (
Index k = 1; k <= r; k++) {
1896 for (
Index i = 0; i < n_partials; i++) {
1897 dFu[i] = dFu[i] * eigF + eigF * dFu[i];
1898 dFl[i] = dFl[i] * eigF + eigF * dFl[i];
1938 assert(n_partials == dF.
npages());
1939 for (
Index ii = 0; ii < n_partials; ii++) {
1947 e = 1. + floor(1. / log(2.) * log(A_norm_inf));
1948 Index r = (e + 1.) > 0. ? (
Index)(e + 1.) : 0;
1953 Matrix M = A, X(n, n), cX(n, n), D(n, n);
1964 Tensor3 dM(n_partials, n, n), Y(n_partials, n, n), cY(n_partials, n, n),
1965 dD(n_partials, n, n);
1966 for (
Index ii = 0; ii < n_partials; ii++) {
1967 for (
Index jj = 0; jj < n; jj++) {
1968 for (
Index kk = 0; kk < n; kk++) {
1969 dM(ii, jj, kk) = dA(ii, jj, kk) * pow2rm1;
1971 Y(ii, jj, kk) =
dM(ii, jj, kk);
1973 cY(ii, jj, kk) = c * Y(ii, jj, kk);
1975 dF(ii, jj, kk) = cY(ii, jj, kk);
1977 dD(ii, jj, kk) = -cY(ii, jj, kk);
1984 Matrix tmp1(n, n), tmp2(n, n);
1986 for (
Index k = 2; k <=
q; k++) {
1990 for (
Index ii = 0; ii < n_partials; ii++) {
1995 for (
Index jj = 0; jj < n; jj++)
1996 for (
Index kk = 0; kk < n; kk++) {
1997 Y(ii, jj, kk) = tmp1(jj, kk) + tmp2(jj, kk);
2000 cY(ii, jj, kk) = c * Y(ii, jj, kk);
2003 dF(ii, jj, kk) += cY(ii, jj, kk);
2008 dD(ii, jj, kk) += cY(ii, jj, kk);
2011 dD(ii, jj, kk) -= cY(ii, jj, kk);
2038 mult(tmp2, tmp1, F);
2042 for (
Index ii = 0; ii < n_partials; ii++) {
2050 for (
Index k = 1; k <= r; k++) {
2051 for (
Index ii = 0; ii < n_partials; ii++) {
2103 e = 1. + floor(1. / log(2.) * log(A_norm_inf));
2104 Index r = (e + 1.) > 0. ? (
Index)(e + 1.) : 0;
2124 Matrix D(n, n), dD(n, n);
2142 Matrix tmp1(n, n), tmp2(n, n);
2144 for (
Index k = 2; k <=
q; k++) {
2192 mult(tmp2, tmp1, F);
2198 mult(tmp2, tmp1, dF);
2201 for (
Index k = 1; k <= r; k++) {
2229 for (
Index i = 0; i < A.
ncols(); i++) row_sum +=
abs(A(i, j));
2242 assert(n == I.
nrows());
2245 for (
Index i = 0; i < n; i++) I(i, i) = 1.;
2258 assert(dim == A.
ncols());
2261 return A(0, 0) * A(1, 1) * A(2, 2) + A(0, 1) * A(1, 2) * A(2, 0) +
2262 A(0, 2) * A(1, 0) * A(2, 1) - A(0, 2) * A(1, 1) * A(2, 0) -
2263 A(0, 1) * A(1, 0) * A(2, 2) - A(0, 0) * A(1, 2) * A(2, 1);
2265 return A(0, 0) * A(1, 1) - A(0, 1) * A(1, 0);
2271 for (
Index j = 0; j < dim; j++) {
2273 for (
Index I = 1; I < dim; I++)
2274 for (
Index J = 0; J < dim; J++) {
2276 temp(I - 1, J) = A(I, J);
2278 temp(I - 1, J - 1) = A(I, J);
2283 ret_val += ((j % 2 == 0) ? -1. : 1.) * tempNum * A(0, j);
2303 const Index n =
x.nelem();
2305 assert(
y.nelem() == n);
2333 Numeric s1 = 0, xm = 0, s3 = 0, s4 = 0;
2335 for (
Index i = 0; i < n; i++) {
2339 for (
Index i = 0; i < n; i++) {
2347 p[0] = s3 /
Numeric(n) - p[1] * xm;
2352 const Index n =
x.nelem();
2373 Eigen::ComplexEigenSolver<Eigen::MatrixXcd>
eig(
const Eigen::Ref<Eigen::MatrixXcd> A)
2375 Eigen::ComplexEigenSolver<Eigen::MatrixXcd> ces;
2376 return ces.compute(A);