CalcEigRealSymm3.m

function varargout = CalcEigRealSymm3(A11,A22,A33,A12,A13,A23,nval,nvec)
% CALCEIGREALSYMM3 Eigenvalues and -vectors for symmetric real matrices
% [...] = CalcEigRealSymm3(A11,A22,A33,A12,A13,A23,NVal,NVec)
%
% NOTE: This function is mostly meant for internal use by EIGREALSYMM3 and
% STRUCTURETENSOREIG3.
%
% INPUT
% A11,A22,A33,A12,A13,A23 - N x M x L arrays with matrix elements
% NVal                    - Return N smallest eigenvalues
% NVec                    - Return eigenvectors for N smallest eigenvalues
%
% OUTPUT
% [...] - NVal arrays with eigenvalues followed by 3*NVec arrays with
%         eigenvector componentes (in order X1,Y1,Z1,X2,Y2,Z2,X3,Y3,Z3).
%         All arrays have size N x M x L.
%
% See also EIGREALSYMM3 STRUCTURETENSOREIG3
%
% Patrick M. Jensen, 2019, ROCKWOOL

% Implements the algorithm in:
%
% Scherzinger, W. M; Dohrman, C. R
% A robust algorithm for finding the eigenvalues and eigenvectors of 3 x 3
% symmetric matrices
% Comput. Methods Appl. Mech. Engrg., 2008
% DOI: 10.1016/j.cma.2008.03.031

% NOTE: This code is optimized for speed, *not* readability. To understand
% each step, please read the above paper first!

% Save original size to reshape at the end
origsiz = size(A11);

varargout = cell(1,nval+3*nvec);

% Reshape all inputs to 1D arrays
A11 = reshape(A11,[],1);
A22 = reshape(A22,[],1);
A33 = reshape(A33,[],1);
A12 = reshape(A12,[],1);
A13 = reshape(A13,[],1);
A23 = reshape(A23,[],1);

siz = size(A11);

% Convert to deviatoric matrix
trA = (A11 + A22 + A33)/3;
A11 = A11 - trA;
A22 = A22 - trA;
A33 = A33 - trA;

% Find the most distinct eigenvalue
Eta1 = MostDistEigVal(A11,A22,A33,A12,A13,A23);

% Find the most distinct eigenvector v1, and s1 and s2
[V1,S1,S2] = MostDistEigBasis(A11,A22,A33,A12,A13,A23,Eta1);

% Find the other eigenvalues
[Eta2,Eta3] = OtherEigVals(A11,A22,A33,A12,A13,A23,S1,S2);

% Find the other eigenvectors
W1 = ComputeW1(A11,A22,A33,A12,A13,A23,S1,S2,Eta2);
clear S1 S2;
V2 = RowCross(V1,W1);
clear W1;
V2 = bsxfun(@rdivide, V2, sqrt(RowNorms2(V2)));
V3 = RowCross(V2,V1);
% Get the smallest eigenvalue and corresponding eigencectors
% NOTE: Since we know there are 3 eigenvalues, this is way of sorting is
% faster than calling the sort function.
AllEta = [Eta1,Eta2,Eta3];
Es = zeros(size(AllEta),'like',AllEta);
Idx = zeros(size(AllEta),'like',AllEta);
[Es(:,1),Idx(:,1)] = min(AllEta,[],2);
[Es(:,3),Idx(:,3)] = max(AllEta,[],2);
Idx(:,3) = Idx(:,3) + (Es(:,1) == Es(:,3));
Idx(:,2) = 6-Idx(:,1)-Idx(:,3);
for i = 1:3
    Es(Idx(:,2) == i,2) = AllEta(Idx(:,2) == i,i);
end

clear Eta2 Eta3;
Es = Es + repmat(trA,1,3);
for i = 1:nval
    varargout{i} = reshape(Es(:,i),origsiz);
end

for i = 1:nvec
    Xi = zeros(siz,'like',A11);
    Yi = zeros(siz,'like',A11);
    Zi = zeros(siz,'like',A11);
    
    Xi(Idx(:,i) == 1) = V1(Idx(:,i) == 1,1);    
    Xi(Idx(:,i) == 2) = V2(Idx(:,i) == 2,1);
    Xi(Idx(:,i) == 3) = V3(Idx(:,i) == 3,1);
    Xi(Eta1 == 0) = 1;
    
    Yi(Idx(:,i) == 1) = V1(Idx(:,i) == 1,2);
    Yi(Idx(:,i) == 2) = V2(Idx(:,i) == 2,2);
    Yi(Idx(:,i) == 3) = V3(Idx(:,i) == 3,2);
    Yi(Eta1 == 0) = 0;
    
    Zi(Idx(:,i) == 1) = V1(Idx(:,i) == 1,3);
    Zi(Idx(:,i) == 2) = V2(Idx(:,i) == 2,3);
    Zi(Idx(:,i) == 3) = V3(Idx(:,i) == 3,3);
    Zi(Eta1 == 0) = 0;
    
    varargout{nval+(i-1)*3+1} = reshape(Xi,origsiz);
    varargout{nval+(i-1)*3+2} = reshape(Yi,origsiz);
    varargout{nval+(i-1)*3+3} = reshape(Zi,origsiz);
end
end

function W1 = ComputeW1(A11,A22,A33,A12,A13,A23,S1,S2,Eta2)
% Helper function which returns w1

% Save the rows of A - Eta2*I
AEta1 = [A11 - Eta2, A12, A13];
AEta2 = [A12, A22 - Eta2, A23];
AEta3 = [A13, A23, A33 - Eta2];

% Compute u1 and u2, select the largest and normalize
U1 = [dot(AEta1,S1,2), dot(AEta2,S1,2), dot(AEta3,S1,2)];
U2 = [dot(AEta1,S2,2), dot(AEta2,S2,2), dot(AEta3,S2,2)];
clear AEta1 AEta2 AEta3;
[LU1s, LIdx] = max([RowNorms2(U1), RowNorms2(U2)],[],2);

% We use bsxfun instead of logical indexing because it is faster. The
% reason is likely that this way we don't indexing (which means branching)
% and everything is the same operations (which is especially good on GPUs).
W1 = bsxfun(@times, U1, LIdx == 1);
W1 = W1 + bsxfun(@times, U2, LIdx == 2);
clear U1 U2;
W1 = bsxfun(@rdivide, W1, sqrt(LU1s));
% In case both u1 and u2 were zero vectors they were both eigenvectors
% for eta2, so returning s1 will make the rest of the algorithm work
W1(abs(LU1s) < eps,:) = S1(abs(LU1s) < eps,:);
end

function [Eta2, Eta3] = OtherEigVals(A11,A22,A33,A12,A13,A23,S1,S2)
% Helper function which returns the other eigenvalues eta2 and eta3

[Ap22,Ap33,Ap23] = SBasisLowRight(A11,A22,A33,A12,A13,A23,S1,S2);
D = Ap22 - Ap33;
trAp = Ap22 + Ap33;
SgnD = -1 + 2 * (D > 0);
Eta2 = 0.5*(trAp - SgnD.*sqrt(D.^2 + 4*Ap23.^2));
Eta3 = trAp - Eta2;
end

function [Ap11,Ap22,Ap12] = SBasisLowRight(A11,A22,A33,A12,A13,A23,S1,S2)
% Helper function which returns the lower right 2x2 part of the deviatoric
% matrix in the (v1,s1,s2) basis

% Save the rows of A
A1 = [A11,A12,A13];
A2 = [A12,A22,A23];
A3 = [A13,A23,A33];

% Compute matrix components
Ap11 = dot(S1, [dot(A1,S1,2), dot(A2,S1,2), dot(A3,S1,2)],2);
Ap22 = dot(S2, [dot(A1,S2,2), dot(A2,S2,2), dot(A3,S2,2)],2);
Ap12 = dot(S1, [dot(A1,S2,2), dot(A2,S2,2), dot(A3,S2,2)],2);
end

function [V1,S1,S2] = MostDistEigBasis(A11,A22,A33,A12,A13,A23,Eta)
% Helper function which returns the most distinct eigenvector, s1, and s2

% Compute the basis vector projections
R1 = [A11 - Eta, A12, A13];
R2 = [A12, A22 - Eta, A23];
R3 = [A13, A23, A33 - Eta];

% Pick the largest r vector and make sorted vectors
[LR1s, RIdx] = max([RowNorms2(R1),RowNorms2(R2),RowNorms2(R3)],[],2);

% We use bsxfun instead of logical indexing because it is faster. The
% reason is likely that this way we don't indexing (which means branching)
% and everything is the same operations (which is especially good on GPUs).
R1s = bsxfun(@times, R1, RIdx == 1);
R1s = R1s + bsxfun(@times, R2, RIdx == 2);
R1s = R1s + bsxfun(@times, R3, RIdx == 3);

R2s = bsxfun(@times, R1, RIdx == 2);
R2s = R2s + bsxfun(@times, R2, RIdx ~= 2);

R3s = bsxfun(@times, R1, RIdx == 3);
R3s = R3s + bsxfun(@times, R3, RIdx ~= 3);

clear R1 R2 R3 RIdx;

% Compute the s1, t1 and t2 vectors
S1 = bsxfun(@rdivide, R1s, sqrt(LR1s));
clear LR1s;
T2 = R2s - bsxfun(@times, dot(S1,R2s,2), S1);
T3 = R3s - bsxfun(@times, dot(S1,R3s,2), S1);

% Select the largest of the t1 and t2 vectors, normalize and compute the
% v1 vectors
[LT2s, TIdx] = max([RowNorms2(T2),RowNorms2(T3)],[],2);

% We use bsxfun instead of logical indexing because it is faster. The
% reason is likely that this way we don't indexing (which means branching)
% and everything is the same operations (which is especially good on GPUs).
S2 = bsxfun(@times, T2, TIdx == 1);
S2 = S2 + bsxfun(@times, T3, TIdx == 2);
S2 = bsxfun(@rdivide, S2, sqrt(LT2s));

V1 = RowCross(S1,S2);
end

function Eta = MostDistEigVal(A11,A22,A33,A12,A13,A23)
% Helper function which returns the most distinct eigenvalue

P = sqrt((A11.^2 + A22.^2 + A33.^2 + 2*(A12.^2 + A13.^2 + A23.^2))/6);
J3Scaled = 0.5*Det3(A11./P,A22./P,A33./P,A12./P,A13./P,A23./P);

% Small numerical errors might cause the argument for acos to have an
% abslute value greater than 1, which results in a complex result.
% Therefore, to avoid this, we just truncate it prior to computation.
Alpha = acos(max(-1, min(1, J3Scaled)))/3;
clear J3Scaled;

Idx = Alpha > pi/6;
Eta = 2*P.*cos(Alpha + Idx.*2*pi/3);
Eta(isnan(Eta)) = 0;
end

function D = Det3(A11,A22,A33,A12,A13,A23)
% Helper function which returns the determinant of a 3x3 symetric matrix

D = A11.*A22.*A33 - A11.*A23.^2 - A33.*A12.^2 + 2*A12.*A13.*A23 - ...
    A13.^2.*A22;
end

function V = RowCross(V1,V2)
% Helper function which returns cross product of vectors stacked in rows

% For some reason, it is *much* faster to this manually, even though it is
% the same as the cross function
V = [V1(:,2).*V2(:,3) - V1(:,3).*V2(:,2),...
     V1(:,3).*V2(:,1) - V1(:,1).*V2(:,3),...
     V1(:,1).*V2(:,2) - V1(:,2).*V2(:,1)];
end

function N = RowNorms2(V)
% Helper function which returns the square norms of vectors stacked in rows

N = sum(V.^2,2);
end