Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Python code for sorting real Schur forms based on the original work of Jan Brandts http://dx.doi.org/10.1002/nla.274
# Python version of the following original work:
# Title: Sorting Real Schur Forms
# Author: Jan Brandts
# E-Mail: brandts-AT-science.uva.nl
# http://m2matlabdb.ma.tum.de/download.jsp?MC_ID=3&MP_ID=119
# http://dx.doi.org/10.1002/nla.274
# Institution: University of Amsterdam
# Description: In Matlab 6, there exists a command to generate a real Schur form, wheras another transforms a real
# Schur form into a complex one. There do not exist commands to prescribe the order in which the eigenvalues appear on
# the diagonal of the upper (quasi-) triangular factor T.
# For the complex case, a routine is sketched in Golub and Van Loan (1996), that orders the diagonal of T according to
# their distance to a target value. In the reference below, we give a Matlab routine to sort real Schur forms in Matlab.
# It is based on a block-swapping procedure by Bai and Demmel (1993).
#
# Sorting real Schur forms, both partially and completely, has important applications in the computation of real
# invariant subspaces.
# J.H. Brandts
# Matlab code for sorted real Schur forms
# Numerical Linear Algebra with Applications 9(3):249-261 (2002)
# Keywords: Real Schur Form, sorting, Bai-Demmel algorithm, swapping
# Based on the original Matlab File Version: 1.0
import numpy as np
expensive_asserts = False
def sort_real_schur(Q, R, z, b, inplace=False):
r'''
:param Q: np.array((N, N))
orthogonal real Q such that AQ=QR
:param R: np.array((N, N))
quasi-triangular real R such that AQ=QR
:param z: complex
target z in the complex plane
if z==float('inf'), order eigenvalues decreasingly by magnitude
:param b: float
determines the length of the ordering with respect to z to be produced
* if b < 0 then -b blocks will be sorted,
* if b > 0 then b or b+1 eigenvalues will be sorted, depending on the sizes of the blocks,
* if b = 0 then the whole Schur form will be sorted.
:return: Q, R, ap
* Q, R : orthogonal real Q and quasi-triangular real R such that AQ=QR with
the diagonal blocks ordered with respect to the target z. The number of
ordered blocks/eigenvalues is determined by the parameter b.
* A vector ap warns for inaccuracy of the solution if an entry of ap exceeds one.
'''
eps = np.finfo(R.dtype).eps
if not np.all(np.abs(np.tril(R, -2)) <= 100*eps):
raise ValueError('R is not block-triangular')
if not inplace:
Q = Q.copy()
R = R.copy()
r = np.where(np.abs(np.diag(R, -1)) > 100*eps)[0] # detect subdiagonal nonzero entries
s = [i for i in range(R.shape[0] + 1) if not i in r + 1] # construct from them a vector s with the-top left positions of each block
p = [None]*(len(s) - 1) # will hold the eigenvalues
for k in range(1, len(s) - 1): # debug
assert R[s[k], s[k] - 1] <= 100*eps # debug
for k in range(len(s) - 1): # ranging over all blocks
sk = s[k]
if s[k + 1] - sk == 2: # if the block is 2x2
Q, R = normalize(Q, R, slice(sk, s[k + 1]), inplace=True) # normalize it
p[k] = R[sk, sk] + np.lib.scimath.sqrt(R[sk + 1, sk]*R[sk, sk + 1]) # store the eigenvalues
else: # (the one with the positive imaginary part is sufficient)
assert s[k + 1] - sk == 1 # debug
p[k] = R[s[k], s[k]] # if the block is 1x1, only store the eigenvalue
ap = []
for k in swaplist(p, s, z, b): # For k ranging over all neighbor-swaps
assert k + 2 < len(s) # debug
v = list(range(s[k], s[k + 1])) # collect the coordinates of the blocks
w = list(range(s[k + 1], s[k + 2]))
assert v[0]!=w[0] # debug
if len(v) == 2: assert v[0] < v[1] # debug
if len(w) == 2: assert w[0] < w[1] # debug
if __debug__ and expensive_asserts: # debug: check that we are moving the larger eigenvalues to the left (expensive test)
if v[0] < w[0]: # debug
arr = [p[k], p[k + 1]] # debug
_, which = select(arr, z) # debug
assert which==1 # debug
else: # debug
arr = [p[k + 1], p[k]] # debug
_, which = select(arr, z) # debug
assert which==1 # debug
vw = v + w
nrA = np.linalg.norm(R[vw, :][:, vw], ord=np.inf) # compute norm of the matrix A from (6)
Q, R = swap(Q, R, v, w, inplace=True) # swap the blocks
p[k], p[k + 1] = p[k + 1], p[k] # debug
s[k + 1] = s[k] + s[k + 2] - s[k + 1] # update positions of blocks
v = list(range(s[k], s[k + 1])) # update block-coordinates
w = list(range(s[k + 1], s[k + 2]))
if len(v)==2: # if the first block is 2 x 2
Q, R = normalize(Q, R, v, inplace=True) # normalize it
if len(w)==2: # if the second block is 2 x 2
Q, R = normalize(Q, R, w, inplace=True) # normalize it
ap.append(np.linalg.norm(R[w, :][:, v], ord=np.inf) / (10*eps*nrA)) # measure size of bottom-left block (see p.6, Sect. 2.3)
R = R - np.tril(R, -2) # Zero the below-block entries
for k in range(1, len(s)-1): # to get a quasi-triangle again
R[s[k], s[k]-1] = 0
return Q, R, ap
#r = find(abs(diag(R,-1)) > 100*eps);
#s = 1:size(R,1)+1;
#s(r+1) = [];
#
#for k=1:length(s)-1;
# sk = s(k);
# if s(k+1)-sk == 2
# [Q,R] = normalize(Q,R,sk:s(k+1)-1);
# p(k) = R(sk,sk)+sqrt(R(sk+1,sk)*R(sk,sk+1));
# else
# p(k) = R(s(k),s(k));
# end
#end
#
#for k = swaplist(p,s,z,b);
# v = s(k):s(k+1)-1;
# w = s(k+1):s(k+2)-1;
# nrA = norm(R([v,w],[v,w]),inf);
# [Q,R] = swap(Q,R,v,w);
# s(k+1) = s(k)+s(k+2)-s(k+1);
# v = s(k):s(k+1)-1;
# w = s(k+1):s(k+2)-1;
# if length(v)==2
# [Q,R] = normalize(Q,R,v);
# end
# if length(w)==2
# [Q,R] = normalize(Q,R,w);
# end
# ap(k) = norm(R(w,v),inf)/(10*eps*nrA);
# end
#
# R = R - tril(R,-2);
# for j=2:length(s)-1; R(s(j),s(j)-1)=0; end
def normalize(U, S, v, inplace=False):
r'''Applies a Givens rotation such that the two-by-two diagonal block of S situated at diagonal positions v[0], v[1] is in standardized form.
:param U:
:param S:
:param v:
:return:
'''
Q = rot(S[v, :][:, v]) # Determine the Givens rotation needed for standardization -
if not inplace:
S = S.copy()
U = U.copy()
S[:, v] = np.dot(S[:, v], Q) # and apply it left and right to S, and right to U.
S[v, :] = np.dot(Q.T, S[v, :]) # Only rows and columns with indices in the vector v can be affected by this.
U[:, v] = np.dot(U[:, v], Q)
return U, S
# function [U,S] = normalize(U,S,v);
# n = size(S,1);
# Q = rot(S(v,v));
# S(:,v) = S(:,v)*Q;
# S(v,:) = Q'*S(v,:);
# U(:,v) = U(:,v)*Q;
def rot(X):
r'''Computes a Givens rotation needed in `normalize`
:param X:
:return:
'''
c = 1.0 # Start with the identity transformation, and if needed, change it into ...
s = 0.0
if X[0, 0] != X[1, 1]:
tau = (X[0, 1] + X[1, 0]) / (X[0, 0] - X[1, 1])
off = (tau**2 + 1)**0.5
v = [tau - off, tau + off]
w = np.argmin(np.abs(v))
c = 1.0/(1.0 + v[w]**2)**0.5 # ... the cosine and sine as given in Section 2.3.1
s = v[w]*c
Q = np.array([[c, -s], [s, c]], dtype=X.dtype)
return Q
# function Q = rot(X);
# c = 1; s = 0;
# if X(1,1)~=X(2,2);
# tau = (X(1,2)+X(2,1))/(X(1,1)-X(2,2));
# off = sqrt(tau^2+1);
# v = [tau - off, tau + off];
# [d,w] = min(abs(v));
# c = 1/sqrt(1+v(w)^2);
# s = v(w)*c;
# end
# Q = [c -s;s c];
def swaplist(p, s, z, b):
r'''Produces the list v of swaps of neighboring blocks needed to order the eigenvalues assembled in the vector v
from closest to z to farthest away from z, taking into account the parameter b.
:param p: list
list of eigenvalues (only one copy for each complex-conjugate pair)
:param s: list
list of the the-top left positions of each block
:param z: complex
target z in the complex plane
:param b: float
determines the length of the ordering with respect to z to be produced
* if b < 0 then -b blocks will be sorted,
* if b > 0 then b or b+1 eigenvalues will be sorted, depending on the sizes of the blocks,
* if b = 0 then the whole Schur form will be sorted.
:return:
'''
p_orig = p # debug
n = len(p)
p = list(p)
k = 0
v = []
srtd = 0 # Number of sorted eigenvalues.
q = list(np.diff(s)) # Compute block sizes.
q_orig = list(q) # debug
fini = False
while not fini:
_, j = select(p[k:n], z) # Determine which block will go to position k
p_j = p[k + j] # debug
p[k:n + 1] = [p[j + k]] + p[k:n] # insert this block at position k,
assert p[k]==p_j # debug
del p[j + k + 1] # and remove it from where it was taken.
if expensive_asserts and __debug__: assert np.all(sorted(p)==sorted(p_orig)) # debug
q_j = q[k + j] # debug
q[k:n + 1] = [q[j + k]] + q[k:n] # Similar for the block-sizes
assert q[k] == q_j # debug
del q[j + k + 1]
if expensive_asserts and __debug__: assert np.all(sorted(q) == sorted(q_orig)) # debug
v = v + list(range(k, j + k))[::-1] # Update the list of swaps for this block
srtd = srtd + q[k] # Update the number of sorted eigenvalues
k += 1
fini = (k >= n - 1 or k == -b or srtd == b or (srtd == b + 1 and b != 0))
return v
# function v = swaplist(p,s,z,b);
# n = length(p);
# k = 0; v = [];
# srtd = 0;
# q = diff(s);
# fini = 0;
# while ~fini
# k = k+1;
# [dum,j] = select(p(k:n),z);
# p(k:n+1) = [p(j+k-1) p(k:n)];
# p(j+k) = [];
# q(k:n+1) = [q(j+k-1) q(k:n)];
# q(j+k) = [];
# v = [v,j+k-2:-1:k];
# srtd = srtd + q(k);
# fini = (k==n-1)|(k==-b)|(srtd==b)|((srtd==b+1)&(b~=0));
# end
def select(p, z):
r'''Determined which element is next in the ordering.
:param p:
:param z:
:return:
'''
if np.isinf(z):
pos = np.argmax(np.abs(p))
return -abs(p[pos]), pos
else:
y = np.real(z) + np.abs(np.imag(z))*1j # Move target to the upper half plane.
delta = np.abs(np.array(p) - y)
pos = np.argmin(delta) # Find block closest to the target.
return delta[pos], pos
# function [val,pos] = select(p,z);
# y = real(z)+abs(imag(z))*i;
# [val pos] = min(abs(p-y));
def swap(U, S, v, w, inplace=False):
r'''Swaps the two diagonal blocks at positions symbolized by the entries of v and w.
:param U:
:param S:
:param v:
:param w:
:return:
'''
p, q = S[v, :][:, w].shape # p and q are block sizes
Ip = np.eye(p)
Iq = np.eye(q)
r = np.concatenate([S[v, w[j]] for j in range(q)]) # Vectorize right-hand side for Kronecker product formulation of the Sylvester equations (7).
K = np.kron(Iq, S[v, :][:, v]) - np.kron(S[w, :][:, w].T, Ip) # Kronecker product system matrix.
L, H, P, Q = lu_complpiv(K, overwrite=True) # LU-decomposition of this matrix.
e = np.min(np.abs(np.diag(H))) # Scaling factor to prevent overflow.
sigp = np.arange(p*q)
for k in range(p*q - 1): # Implement permutation P of the LU-decomposition PAQ=LU ...
sigp[[k, P[k]]] = sigp[[P[k], k]].copy()
r = e*r[sigp] # ... scale and permute the right-hand side.
x = np.linalg.solve(H, np.linalg.solve(L, r)) # and solve the two triangular systems.
sigq = np.arange(p*q)
for k in range(p*q - 1): # Implement permutation Q of the LU-decomposition PAQ=LU ...
sigq[[k, Q[k]]] = sigq[[Q[k], k]].copy()
x[sigq] = x.copy() # ... and permute the solution.
X = np.vstack([x[j*p:(j + 1)*p] for j in range(q)]).T # De-vectorize the solution back to a block, or, quit Kronecker formulation.
Q, R = np.linalg.qr(np.vstack((-X, e*Iq)), mode='complete') # Householder QR-decomposition of X.
vw = list(v) + list(w)
if not inplace:
S = S.copy()
U = U.copy()
S[:, vw] = np.dot(S[:, vw], Q) # Perform the actual swap by left- and right-multiplication of S by Q,
S[vw, :] = np.dot(Q.T, S[vw, :])
U[:, vw] = np.dot(U[:, vw], Q) # and, right-multiplication of U by Q
return U, S
# function [U,S] = swap(U,S,v,w);
# [p,q] = size(S(v,w)); Ip = eye(p); Iq = eye(q);
# r = [];
# for j=1:q
# r = [r;S(v,w(j))];
# end
# K = kron(Iq,S(v,v))-kron(S(w,w)',Ip);
# [L,H,P,Q] = lu_complpiv(K);
# e = min(abs(diag(H)));
# sigp = 1:p*q;
# for k = 1:p*q-1;
# sigp([k,P(k)]) = sigp([P(k),k]);
# end
# r = e*r(sigp);
# x = (H\(L\r));
# sigq = 1:p*q;
# for k = 1:p*q-1;
# sigq([k,Q(k)]) = sigq([Q(k),k]);
# end
# x(sigq) = x;
# X = [];
# for j=1:q
# X = [X,x((j-1)*p+1:j*p)];
# end
# [Q,R] = qr([-X;e*Iq]);
# S(:,[v,w]) = S(:,[v,w])*Q;
# S([v,w],:) = Q'*S([v,w],:);
# U(:,[v,w]) = U(:,[v,w])*Q;
def lu_complpiv(A, overwrite=False):
'''Computes the LU-decomposition of A with complete pivoting, i. e. PAQ=LU, with permutations symbolized by vectors.
:param A:
:return:
'''
if not overwrite or (__debug__ and expensive_asserts):
A_inp = A # debug
A = A.copy()
n = A.shape[0]
P = np.zeros(n - 1, dtype=int)
Q = np.zeros(n - 1, dtype=int)
for k in range(n - 1): # See Golub and Van Loan, p. 118 for comments on this LU-decomposition with complete pivoting.
Ak = A[k:n, :][:, k:n]
rw, cl = np.unravel_index(np.argmax(np.abs(Ak), axis=None), Ak.shape)
rw += k
cl += k
A[[k, rw], :] = A[[rw, k], :].copy()
A[:, [k, cl]] = A[:, [cl, k]].copy()
P[k] = rw
Q[k] = cl
if A[k, k] != 0:
rs = slice(k + 1, n)
A[rs, k] = A[rs, k] / A[k, k]
A[rs, :][:, rs] = A[rs, :][:, rs] - A[rs, k][:, np.newaxis]*A[k, rs]
U = np.tril(A.T).T
L = np.tril(A, -1) + np.eye(n)
if __debug__ and expensive_asserts:
perm_p = np.arange(n) # debug
for k in range(n - 1): # debug
perm_p[[k, P[k]]] = perm_p[[P[k], k]].copy() # debug
perm_q = np.arange(n) # debug
for k in range(n - 1): # debug
perm_q[[k, Q[k]]] = perm_q[[Q[k], k]].copy() # debug
assert np.allclose(A_inp[perm_p, :][:, perm_q], np.dot(L, U)) # debug
return L, U, P, Q
# function [L,U,P,Q] = lu_complpiv(A);
# P = []; Q = []; n = size(A,1);
# for k=1:n-1;
# [a,r] = max(abs(A(k:n,k:n)));
# [dummy,c] = max(abs(a));
# cl = c+k-1;
# rw = r(c)+k-1;
# A([k,rw],:) = A([rw,k],:);
# A(:,[k,cl]) = A(:,[cl,k]);
# P(k) = rw; Q(k) = cl;
# if A(k,k) ~= 0;
# rs = k+1:n;
# A(rs,k) = A(rs,k)/A(k,k);
# A(rs,rs) = A(rs,rs)-A(rs,k)*A(k,rs);
# end
# end
# U = tril(A')'; L = tril(A,-1) + eye(n);
if __name__=='__main__':
import scipy
import scipy.linalg
expensive_asserts = True
for m in range(100):
n = np.random.randint(2, 50)
A = np.random.randn(n, n)
if m%10==0:
z = np.inf
else:
z = float(np.random.randn(1))# + abs(float(np.random.randn(1)))*1j # TODO: rewrite the whole test to cover complex
R, Q = scipy.linalg.schur(A, output='real')
T, Z = scipy.linalg.rsf2csf(R, Q)
ev_orig = np.diag(T)
delta_orig = np.abs(ev_orig - z)
eps = np.finfo(R.dtype).eps
assert np.allclose(np.dot(A, Q), np.dot(Q, R))
r = np.count_nonzero(np.abs(np.diag(R, -1)) > 100*eps)
Q, R, ap = sort_real_schur(Q, R, z, 0, inplace=(m%2==0))
assert np.allclose(np.dot(A, Q), np.dot(Q, R)) # check that still a decomposition of the original matrix
# test that Q and R have the correct structure
assert np.allclose(np.dot(Q, Q.T), np.eye(A.shape[0])) # Q orthonormal
assert np.all(np.tril(R, -2) == 0) # R triangular
assert r == np.count_nonzero(np.abs(np.diag(R, -1)) > 100 * eps) # number of blocks in R is preserved
# check that eigenvalues are sorted
T, Z = scipy.linalg.rsf2csf(R, Q)
ev = np.diag(T)
# check that eigenvalues were preserved
for a, b in zip(np.sort(ev), np.sort(ev_orig)):
assert np.allclose(a, b) or np.allclose(a, np.conj(b)) # TODO: check that both b and conj(b) are present in list
if np.isinf(z):
delta = -np.abs(ev)
else:
delta = np.abs(ev - z)
assert np.all(delta[0:-1] <= delta[1:] + 100*eps), (np.max(delta[0:-1]-delta[1:]), delta)
print('OK')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment