fabian-paul/sort_real_schur.py

## sort_real_schur.py
# Python version of the following original work:
# Title:	Sorting Real Schur Forms
# Author:	Jan Brandts
# E-Mail:	brandts-AT-science.uva.nl
# http://m2matlabdb.ma.tum.de/download.jsp?MC_ID=3&MP_ID=119
# http://dx.doi.org/10.1002/nla.274
# Institution:	University of Amsterdam
# Description:	In Matlab 6, there exists a command to generate a real Schur form, wheras another transforms a real
# Schur form into a complex one. There do not exist commands to prescribe the order in which the eigenvalues appear on
# the diagonal of the upper (quasi-) triangular factor T.
# For the complex case, a routine is sketched in Golub and Van Loan (1996), that orders the diagonal of T according to
# their distance to a target value. In the reference below, we give a Matlab routine to sort real Schur forms in Matlab.
# It is based on a block-swapping procedure by Bai and Demmel (1993).
#
# Sorting real Schur forms, both partially and completely, has important applications in the computation of real
# invariant subspaces.
# J.H. Brandts
# Matlab code for sorted real Schur forms
# Numerical Linear Algebra with Applications 9(3):249-261 (2002)
# Keywords:	Real Schur Form, sorting, Bai-Demmel algorithm, swapping
# Based on the original Matlab File Version: 1.0

import numpy as np

expensive_asserts = False

def sort_real_schur(Q, R, z, b, inplace=False):
    r'''
    :param Q: np.array((N, N))
        orthogonal real Q such that AQ=QR
    :param R: np.array((N, N))
        quasi-triangular real R such that AQ=QR
    :param z: complex
        target z in the complex plane
        if z==float('inf'), order eigenvalues decreasingly by magnitude
    :param b: float
        determines the length of the ordering with respect to z to be produced
        * if b < 0 then -b blocks will be sorted,
        * if b > 0 then  b or b+1 eigenvalues will be sorted, depending on the sizes of the blocks,
        * if b = 0 then the whole Schur form will be sorted.

    :return: Q, R, ap
        * Q, R : orthogonal real Q and quasi-triangular real R such that AQ=QR with
          the diagonal blocks ordered with respect to the target z. The number of
          ordered blocks/eigenvalues is determined by the parameter b.

        * A vector ap warns for inaccuracy of the solution if an entry of ap exceeds one.
    '''
    eps = np.finfo(R.dtype).eps
    if not np.all(np.abs(np.tril(R, -2)) <= 100*eps):
        raise ValueError('R is not block-triangular')
    if not inplace:
        Q = Q.copy()
        R = R.copy()

    r = np.where(np.abs(np.diag(R, -1)) > 100*eps)[0]  # detect subdiagonal nonzero entries
    s = [i for i in range(R.shape[0] + 1) if not i in r + 1]  # construct from them a vector s with the-top left positions of each block
    p = [None]*(len(s) - 1)  # will hold the eigenvalues

    for k in range(1, len(s) - 1):  # debug
        assert R[s[k], s[k] - 1] <= 100*eps  # debug

    for k in range(len(s) - 1):  # ranging over all blocks
        sk = s[k]
        if s[k + 1] - sk == 2:  # if the block is 2x2
            Q, R = normalize(Q, R, slice(sk, s[k + 1]), inplace=True)  # normalize it
            p[k] = R[sk, sk] + np.lib.scimath.sqrt(R[sk + 1, sk]*R[sk, sk + 1])  # store the eigenvalues
        else:  # (the one with the positive imaginary part is sufficient)
            assert s[k + 1] - sk == 1  # debug
            p[k] = R[s[k], s[k]]  # if the block is 1x1, only store the eigenvalue

    ap = []

    for k in swaplist(p, s, z, b):  # For k ranging over all neighbor-swaps
        assert k + 2 < len(s)  # debug
        v = list(range(s[k], s[k + 1]))  # collect the coordinates of the blocks
        w = list(range(s[k + 1], s[k + 2]))
        assert v[0]!=w[0]  # debug
        if len(v) == 2: assert v[0] < v[1]  # debug
        if len(w) == 2: assert w[0] < w[1]  # debug
        if __debug__ and expensive_asserts:  # debug: check that we are moving the larger eigenvalues to the left (expensive test)
            if v[0] < w[0]:  # debug
                arr = [p[k], p[k + 1]] # debug
                _, which = select(arr, z)  # debug
                assert which==1  # debug
            else:  # debug
                arr = [p[k + 1], p[k]]  # debug
                _, which = select(arr, z)  # debug
                assert which==1  # debug
        vw = v + w
        nrA = np.linalg.norm(R[vw, :][:, vw], ord=np.inf)  # compute norm of the matrix A from (6)
        Q, R = swap(Q, R, v, w, inplace=True)  # swap the blocks
        p[k], p[k + 1] = p[k + 1], p[k]  # debug
        s[k + 1] = s[k] + s[k + 2] - s[k + 1]  # update positions of blocks
        v = list(range(s[k], s[k + 1]))  # update block-coordinates
        w = list(range(s[k + 1], s[k + 2]))
        if len(v)==2:  # if the first block is 2 x 2
            Q, R = normalize(Q, R, v, inplace=True)  # normalize it
        if len(w)==2:  # if the second block is 2 x 2
            Q, R = normalize(Q, R, w, inplace=True)  # normalize it
        ap.append(np.linalg.norm(R[w, :][:, v], ord=np.inf) / (10*eps*nrA))  # measure size of bottom-left block (see p.6, Sect. 2.3)

    R = R - np.tril(R, -2)  # Zero the below-block entries
    for k in range(1, len(s)-1):  # to get a quasi-triangle again
        R[s[k], s[k]-1] = 0

    return Q, R, ap

#r = find(abs(diag(R,-1)) > 100*eps);
#s = 1:size(R,1)+1;
#s(r+1) = [];
#
#for k=1:length(s)-1;
#  sk = s(k);
#  if s(k+1)-sk == 2
#    [Q,R] = normalize(Q,R,sk:s(k+1)-1);
#    p(k)  = R(sk,sk)+sqrt(R(sk+1,sk)*R(sk,sk+1));
#  else
#    p(k)  = R(s(k),s(k));
# end
#end
#
#for k = swaplist(p,s,z,b);
#  v      = s(k):s(k+1)-1;
#  w      = s(k+1):s(k+2)-1;
#  nrA    = norm(R([v,w],[v,w]),inf);
#  [Q,R]  = swap(Q,R,v,w);
#  s(k+1) = s(k)+s(k+2)-s(k+1);
#  v      = s(k):s(k+1)-1;
#  w      = s(k+1):s(k+2)-1;
#  if length(v)==2
#    [Q,R] = normalize(Q,R,v);
#  end
#  if length(w)==2
#    [Q,R] = normalize(Q,R,w);
#  end
#  ap(k)  = norm(R(w,v),inf)/(10*eps*nrA);
# end
#
# R = R - tril(R,-2);
# for j=2:length(s)-1; R(s(j),s(j)-1)=0; end

def normalize(U, S, v, inplace=False):
    r'''Applies a Givens rotation such that the two-by-two diagonal block of S situated at diagonal positions v[0], v[1] is in standardized form.

    :param U:
    :param S:
    :param v:
    :return:
    '''
    Q = rot(S[v, :][:, v])  # Determine the Givens rotation needed for standardization -
    if not inplace:
        S = S.copy()
        U = U.copy()
    S[:, v] = np.dot(S[:, v], Q)  # and apply it left and right to S, and right to U.
    S[v, :] = np.dot(Q.T, S[v, :])  # Only rows and columns with indices in the vector v can be affected by this.
    U[:, v] = np.dot(U[:, v], Q)
    return U, S

#  function [U,S] = normalize(U,S,v);
#  n  = size(S,1);
#  Q  = rot(S(v,v));
#  S(:,v) = S(:,v)*Q;
#  S(v,:) = Q'*S(v,:);
#  U(:,v) = U(:,v)*Q;


def rot(X):
    r'''Computes a Givens rotation needed in `normalize`

    :param X:
    :return:
    '''
    c = 1.0  # Start with the identity transformation, and if needed, change it into ...
    s = 0.0
    if X[0, 0] != X[1, 1]:
        tau = (X[0, 1] + X[1, 0]) / (X[0, 0] - X[1, 1])
        off = (tau**2 + 1)**0.5
        v = [tau - off, tau + off]
        w = np.argmin(np.abs(v))
        c = 1.0/(1.0 + v[w]**2)**0.5  # ... the cosine and sine as given in Section 2.3.1
        s = v[w]*c
    Q = np.array([[c, -s], [s, c]], dtype=X.dtype)
    return Q

#  function Q = rot(X);
#  c = 1; s = 0;
#  if X(1,1)~=X(2,2);
#    tau   = (X(1,2)+X(2,1))/(X(1,1)-X(2,2));
#    off   = sqrt(tau^2+1);
#    v     = [tau - off, tau + off];
#    [d,w] = min(abs(v));
#    c     = 1/sqrt(1+v(w)^2);
#    s     = v(w)*c;
#  end
#  Q = [c -s;s c];

def swaplist(p, s, z, b):
    r'''Produces the list v of swaps of neighboring blocks needed to order the eigenvalues assembled in the vector v
     from closest to z to farthest away from z, taking into account the parameter b.

    :param p: list
        list of eigenvalues (only one copy for each complex-conjugate pair)
    :param s: list
        list of the the-top left positions of each block
    :param z: complex
        target z in the complex plane
    :param b: float
        determines the length of the ordering with respect to z to be produced
        * if b < 0 then -b blocks will be sorted,
        * if b > 0 then  b or b+1 eigenvalues will be sorted, depending on the sizes of the blocks,
        * if b = 0 then the whole Schur form will be sorted.
    :return:
    '''
    p_orig = p  # debug
    n = len(p)
    p = list(p)
    k = 0
    v = []
    srtd = 0  # Number of sorted eigenvalues.
    q = list(np.diff(s))  # Compute block sizes.
    q_orig = list(q) # debug
    fini = False
    while not fini:
        _, j = select(p[k:n], z)  # Determine which block will go to position k
        p_j = p[k + j] # debug
        p[k:n + 1] = [p[j + k]] + p[k:n]  # insert this block at position k,
        assert p[k]==p_j  # debug
        del p[j + k + 1]  # and remove it from where it was taken.
        if expensive_asserts and __debug__: assert np.all(sorted(p)==sorted(p_orig))  # debug
        q_j = q[k + j]  # debug
        q[k:n + 1] = [q[j + k]] + q[k:n]  # Similar for the block-sizes
        assert q[k] == q_j  # debug
        del q[j + k + 1]
        if expensive_asserts and __debug__: assert np.all(sorted(q) == sorted(q_orig)) # debug
        v = v + list(range(k, j + k))[::-1]  # Update the list of swaps for this block
        srtd = srtd + q[k]  # Update the number of sorted eigenvalues
        k += 1
        fini = (k >= n - 1 or k == -b or srtd == b or (srtd == b + 1 and b != 0))
    return v

#  function v = swaplist(p,s,z,b);
#  n = length(p);
#  k = 0; v = [];
#  srtd = 0;
#  q = diff(s);
#  fini = 0;
#  while ~fini
#    k        = k+1;
#    [dum,j]  = select(p(k:n),z);
#    p(k:n+1) = [p(j+k-1) p(k:n)];
#    p(j+k)   = [];
#    q(k:n+1) = [q(j+k-1) q(k:n)];
#    q(j+k)   = [];
#    v        = [v,j+k-2:-1:k];
#    srtd     = srtd + q(k);
#    fini     = (k==n-1)|(k==-b)|(srtd==b)|((srtd==b+1)&(b~=0));
#  end


def select(p, z):
    r'''Determined which element is next in the ordering.

    :param p:
    :param z:
    :return:
    '''
    if np.isinf(z):
        pos = np.argmax(np.abs(p))
        return -abs(p[pos]), pos
    else:
        y = np.real(z) + np.abs(np.imag(z))*1j  # Move target to the upper half plane.
        delta = np.abs(np.array(p) - y)
        pos = np.argmin(delta)  # Find block closest to the target.
        return delta[pos], pos

#  function [val,pos] = select(p,z);
#  y = real(z)+abs(imag(z))*i;
#  [val pos] = min(abs(p-y));


def swap(U, S, v, w, inplace=False):
    r'''Swaps the two diagonal blocks at positions symbolized by the entries of v and w.

    :param U:
    :param S:
    :param v:
    :param w:
    :return:
    '''
    p, q = S[v, :][:, w].shape  # p and q are block sizes
    Ip = np.eye(p)
    Iq = np.eye(q)
    r = np.concatenate([S[v, w[j]] for j in range(q)])  # Vectorize right-hand side for Kronecker product formulation of the Sylvester equations (7).
    K = np.kron(Iq, S[v, :][:, v]) - np.kron(S[w, :][:, w].T, Ip)  # Kronecker product system matrix.
    L, H, P, Q = lu_complpiv(K, overwrite=True)  # LU-decomposition of this matrix.
    e = np.min(np.abs(np.diag(H)))  # Scaling factor to prevent overflow.
    sigp = np.arange(p*q)
    for k in range(p*q - 1):  # Implement permutation P of the LU-decomposition PAQ=LU ...
        sigp[[k, P[k]]] = sigp[[P[k], k]].copy()
    r = e*r[sigp]  # ... scale and permute the right-hand side.
    x = np.linalg.solve(H, np.linalg.solve(L, r))  # and solve the two triangular systems.
    sigq = np.arange(p*q)
    for k in range(p*q - 1):  # Implement permutation Q of the LU-decomposition PAQ=LU ...
        sigq[[k, Q[k]]] = sigq[[Q[k], k]].copy()
    x[sigq] = x.copy()  # ... and permute the solution.
    X = np.vstack([x[j*p:(j + 1)*p] for j in range(q)]).T  # De-vectorize the solution back to a block, or, quit Kronecker formulation.
    Q, R = np.linalg.qr(np.vstack((-X, e*Iq)), mode='complete')  # Householder QR-decomposition of X.
    vw = list(v) + list(w)
    if not inplace:
        S = S.copy()
        U = U.copy()
    S[:, vw] = np.dot(S[:, vw], Q)  # Perform the actual swap by left- and right-multiplication of S by Q,
    S[vw, :] = np.dot(Q.T, S[vw, :])
    U[:, vw] = np.dot(U[:, vw], Q)  # and, right-multiplication of U by Q
    return U, S

#  function [U,S] = swap(U,S,v,w);
#  [p,q] = size(S(v,w)); Ip = eye(p); Iq = eye(q);
#  r = [];
#  for j=1:q
#    r = [r;S(v,w(j))];
#  end
#  K = kron(Iq,S(v,v))-kron(S(w,w)',Ip);
#  [L,H,P,Q] = lu_complpiv(K);
#  e = min(abs(diag(H)));
#  sigp = 1:p*q;
#  for k = 1:p*q-1;
#    sigp([k,P(k)]) = sigp([P(k),k]);
#  end
#  r = e*r(sigp);
#  x = (H\(L\r));
#  sigq = 1:p*q;
#  for k = 1:p*q-1;
#    sigq([k,Q(k)]) = sigq([Q(k),k]);
#  end
#  x(sigq) = x;
#  X = [];
#  for j=1:q
#    X = [X,x((j-1)*p+1:j*p)];
#  end
#  [Q,R]      = qr([-X;e*Iq]);
#  S(:,[v,w]) = S(:,[v,w])*Q;
#  S([v,w],:) = Q'*S([v,w],:);
#  U(:,[v,w]) = U(:,[v,w])*Q;

def lu_complpiv(A, overwrite=False):
    '''Computes the LU-decomposition of A with complete pivoting, i. e. PAQ=LU, with permutations symbolized by vectors.

    :param A:
    :return:
    '''
    if not overwrite or (__debug__ and expensive_asserts):
        A_inp = A  # debug
        A = A.copy()
    n = A.shape[0]
    P = np.zeros(n - 1, dtype=int)
    Q = np.zeros(n - 1, dtype=int)
    for k in range(n - 1):  # See Golub and Van Loan, p. 118 for comments on this LU-decomposition with complete pivoting.
        Ak = A[k:n, :][:, k:n]
        rw, cl = np.unravel_index(np.argmax(np.abs(Ak), axis=None), Ak.shape)
        rw += k
        cl += k
        A[[k, rw], :] = A[[rw, k], :].copy()
        A[:, [k, cl]] = A[:, [cl,  k]].copy()
        P[k] = rw
        Q[k] = cl
        if A[k, k] != 0:
            rs = slice(k + 1, n)
            A[rs, k] = A[rs, k] / A[k, k]
            A[rs, :][:, rs] = A[rs, :][:, rs] - A[rs, k][:, np.newaxis]*A[k, rs]
    U = np.tril(A.T).T
    L = np.tril(A, -1) + np.eye(n)
    if __debug__ and expensive_asserts:
        perm_p = np.arange(n)  # debug
        for k in range(n - 1):  # debug
            perm_p[[k, P[k]]] = perm_p[[P[k], k]].copy()  # debug
        perm_q = np.arange(n)  # debug
        for k in range(n - 1):  # debug
            perm_q[[k, Q[k]]] = perm_q[[Q[k], k]].copy()  # debug
        assert np.allclose(A_inp[perm_p, :][:, perm_q], np.dot(L, U))  # debug
    return L, U, P, Q

#    function [L,U,P,Q] = lu_complpiv(A);
#    P = []; Q = []; n = size(A,1);
#    for k=1:n-1;
#      [a,r] = max(abs(A(k:n,k:n)));
#      [dummy,c] = max(abs(a));
#      cl  = c+k-1;
#      rw  = r(c)+k-1;
#      A([k,rw],:) = A([rw,k],:);
#      A(:,[k,cl]) = A(:,[cl,k]);
#      P(k) = rw; Q(k) = cl;
#      if A(k,k) ~= 0;
#        rs = k+1:n;
#        A(rs,k)  = A(rs,k)/A(k,k);
#        A(rs,rs) = A(rs,rs)-A(rs,k)*A(k,rs);
#      end
#    end
#    U = tril(A')'; L = tril(A,-1) + eye(n);

if __name__=='__main__':
    import scipy
    import scipy.linalg
    expensive_asserts = True
    for m in range(100):
        n = np.random.randint(2, 50)
        A = np.random.randn(n, n)
        if m%10==0:
            z = np.inf
        else:
            z = float(np.random.randn(1))# + abs(float(np.random.randn(1)))*1j # TODO: rewrite the whole test to cover complex
        R, Q = scipy.linalg.schur(A, output='real')
        T, Z = scipy.linalg.rsf2csf(R, Q)
        ev_orig = np.diag(T)
        delta_orig = np.abs(ev_orig - z)
        eps = np.finfo(R.dtype).eps
        assert np.allclose(np.dot(A, Q), np.dot(Q, R))
        r = np.count_nonzero(np.abs(np.diag(R, -1)) > 100*eps)

        Q, R, ap = sort_real_schur(Q, R, z, 0, inplace=(m%2==0))

        assert np.allclose(np.dot(A, Q), np.dot(Q, R))  # check that still a decomposition of the original matrix

        # test that Q and R have the correct structure
        assert np.allclose(np.dot(Q, Q.T), np.eye(A.shape[0]))  # Q orthonormal
        assert np.all(np.tril(R, -2) == 0)  # R triangular
        assert r == np.count_nonzero(np.abs(np.diag(R, -1)) > 100 * eps)  # number of blocks in R is preserved

        # check that eigenvalues are sorted
        T, Z = scipy.linalg.rsf2csf(R, Q)
        ev = np.diag(T)
        # check that eigenvalues were preserved
        for a, b in zip(np.sort(ev), np.sort(ev_orig)):
            assert np.allclose(a, b) or np.allclose(a, np.conj(b))  # TODO: check that both b and conj(b) are present in list
        if np.isinf(z):
            delta = -np.abs(ev)
        else:
            delta = np.abs(ev - z)
        assert np.all(delta[0:-1] <= delta[1:] + 100*eps), (np.max(delta[0:-1]-delta[1:]), delta)
    print('OK')
	# Python version of the following original work:
	# Title: Sorting Real Schur Forms
	# Author: Jan Brandts
	# E-Mail: brandts-AT-science.uva.nl
	# http://m2matlabdb.ma.tum.de/download.jsp?MC_ID=3&MP_ID=119
	# http://dx.doi.org/10.1002/nla.274
	# Institution: University of Amsterdam
	# Description: In Matlab 6, there exists a command to generate a real Schur form, wheras another transforms a real
	# Schur form into a complex one. There do not exist commands to prescribe the order in which the eigenvalues appear on
	# the diagonal of the upper (quasi-) triangular factor T.
	# For the complex case, a routine is sketched in Golub and Van Loan (1996), that orders the diagonal of T according to
	# their distance to a target value. In the reference below, we give a Matlab routine to sort real Schur forms in Matlab.
	# It is based on a block-swapping procedure by Bai and Demmel (1993).
	#
	# Sorting real Schur forms, both partially and completely, has important applications in the computation of real
	# invariant subspaces.
	# J.H. Brandts
	# Matlab code for sorted real Schur forms
	# Numerical Linear Algebra with Applications 9(3):249-261 (2002)
	# Keywords: Real Schur Form, sorting, Bai-Demmel algorithm, swapping
	# Based on the original Matlab File Version: 1.0

	import numpy as np

	expensive_asserts = False

	def sort_real_schur(Q, R, z, b, inplace=False):
	r'''
	:param Q: np.array((N, N))
	orthogonal real Q such that AQ=QR
	:param R: np.array((N, N))
	quasi-triangular real R such that AQ=QR
	:param z: complex
	target z in the complex plane
	if z==float('inf'), order eigenvalues decreasingly by magnitude
	:param b: float
	determines the length of the ordering with respect to z to be produced
	* if b < 0 then -b blocks will be sorted,
	* if b > 0 then b or b+1 eigenvalues will be sorted, depending on the sizes of the blocks,
	* if b = 0 then the whole Schur form will be sorted.

	:return: Q, R, ap
	* Q, R : orthogonal real Q and quasi-triangular real R such that AQ=QR with
	the diagonal blocks ordered with respect to the target z. The number of
	ordered blocks/eigenvalues is determined by the parameter b.

	* A vector ap warns for inaccuracy of the solution if an entry of ap exceeds one.
	'''
	eps = np.finfo(R.dtype).eps
	if not np.all(np.abs(np.tril(R, -2)) <= 100*eps):
	raise ValueError('R is not block-triangular')
	if not inplace:
	Q = Q.copy()
	R = R.copy()

	r = np.where(np.abs(np.diag(R, -1)) > 100*eps)[0] # detect subdiagonal nonzero entries
	s = [i for i in range(R.shape[0] + 1) if not i in r + 1] # construct from them a vector s with the-top left positions of each block
	p = [None]*(len(s) - 1) # will hold the eigenvalues

	for k in range(1, len(s) - 1): # debug
	assert R[s[k], s[k] - 1] <= 100*eps # debug

	for k in range(len(s) - 1): # ranging over all blocks
	sk = s[k]
	if s[k + 1] - sk == 2: # if the block is 2x2
	Q, R = normalize(Q, R, slice(sk, s[k + 1]), inplace=True) # normalize it
	p[k] = R[sk, sk] + np.lib.scimath.sqrt(R[sk + 1, sk]*R[sk, sk + 1]) # store the eigenvalues
	else: # (the one with the positive imaginary part is sufficient)
	assert s[k + 1] - sk == 1 # debug
	p[k] = R[s[k], s[k]] # if the block is 1x1, only store the eigenvalue

	ap = []

	for k in swaplist(p, s, z, b): # For k ranging over all neighbor-swaps
	assert k + 2 < len(s) # debug
	v = list(range(s[k], s[k + 1])) # collect the coordinates of the blocks
	w = list(range(s[k + 1], s[k + 2]))
	assert v[0]!=w[0] # debug
	if len(v) == 2: assert v[0] < v[1] # debug
	if len(w) == 2: assert w[0] < w[1] # debug
	if __debug__ and expensive_asserts: # debug: check that we are moving the larger eigenvalues to the left (expensive test)
	if v[0] < w[0]: # debug
	arr = [p[k], p[k + 1]] # debug
	_, which = select(arr, z) # debug
	assert which==1 # debug
	else: # debug
	arr = [p[k + 1], p[k]] # debug
	_, which = select(arr, z) # debug
	assert which==1 # debug
	vw = v + w
	nrA = np.linalg.norm(R[vw, :][:, vw], ord=np.inf) # compute norm of the matrix A from (6)
	Q, R = swap(Q, R, v, w, inplace=True) # swap the blocks
	p[k], p[k + 1] = p[k + 1], p[k] # debug
	s[k + 1] = s[k] + s[k + 2] - s[k + 1] # update positions of blocks
	v = list(range(s[k], s[k + 1])) # update block-coordinates
	w = list(range(s[k + 1], s[k + 2]))
	if len(v)==2: # if the first block is 2 x 2
	Q, R = normalize(Q, R, v, inplace=True) # normalize it
	if len(w)==2: # if the second block is 2 x 2
	Q, R = normalize(Q, R, w, inplace=True) # normalize it
	ap.append(np.linalg.norm(R[w, :][:, v], ord=np.inf) / (10epsnrA)) # measure size of bottom-left block (see p.6, Sect. 2.3)

	R = R - np.tril(R, -2) # Zero the below-block entries
	for k in range(1, len(s)-1): # to get a quasi-triangle again
	R[s[k], s[k]-1] = 0

	return Q, R, ap

	#r = find(abs(diag(R,-1)) > 100*eps);
	#s = 1:size(R,1)+1;
	#s(r+1) = [];
	#
	#for k=1:length(s)-1;
	# sk = s(k);
	# if s(k+1)-sk == 2
	# [Q,R] = normalize(Q,R,sk:s(k+1)-1);
	# p(k) = R(sk,sk)+sqrt(R(sk+1,sk)*R(sk,sk+1));
	# else
	# p(k) = R(s(k),s(k));
	# end
	#end
	#
	#for k = swaplist(p,s,z,b);
	# v = s(k):s(k+1)-1;
	# w = s(k+1):s(k+2)-1;
	# nrA = norm(R([v,w],[v,w]),inf);
	# [Q,R] = swap(Q,R,v,w);
	# s(k+1) = s(k)+s(k+2)-s(k+1);
	# v = s(k):s(k+1)-1;
	# w = s(k+1):s(k+2)-1;
	# if length(v)==2
	# [Q,R] = normalize(Q,R,v);
	# end
	# if length(w)==2
	# [Q,R] = normalize(Q,R,w);
	# end
	# ap(k) = norm(R(w,v),inf)/(10epsnrA);
	# end
	#
	# R = R - tril(R,-2);
	# for j=2:length(s)-1; R(s(j),s(j)-1)=0; end

	def normalize(U, S, v, inplace=False):
	r'''Applies a Givens rotation such that the two-by-two diagonal block of S situated at diagonal positions v[0], v[1] is in standardized form.

	:param U:
	:param S:
	:param v:
	:return:
	'''
	Q = rot(S[v, :][:, v]) # Determine the Givens rotation needed for standardization -
	if not inplace:
	S = S.copy()
	U = U.copy()
	S[:, v] = np.dot(S[:, v], Q) # and apply it left and right to S, and right to U.
	S[v, :] = np.dot(Q.T, S[v, :]) # Only rows and columns with indices in the vector v can be affected by this.
	U[:, v] = np.dot(U[:, v], Q)
	return U, S

	# function [U,S] = normalize(U,S,v);
	# n = size(S,1);
	# Q = rot(S(v,v));
	# S(:,v) = S(:,v)*Q;
	# S(v,:) = Q'*S(v,:);
	# U(:,v) = U(:,v)*Q;


	def rot(X):
	r'''Computes a Givens rotation needed in `normalize`

	:param X:
	:return:
	'''
	c = 1.0 # Start with the identity transformation, and if needed, change it into ...
	s = 0.0
	if X[0, 0] != X[1, 1]:
	tau = (X[0, 1] + X[1, 0]) / (X[0, 0] - X[1, 1])
	off = (tau2 + 1)0.5
	v = [tau - off, tau + off]
	w = np.argmin(np.abs(v))
	c = 1.0/(1.0 + v[w]2)0.5 # ... the cosine and sine as given in Section 2.3.1
	s = v[w]*c
	Q = np.array([[c, -s], [s, c]], dtype=X.dtype)
	return Q

	# function Q = rot(X);
	# c = 1; s = 0;
	# if X(1,1)~=X(2,2);
	# tau = (X(1,2)+X(2,1))/(X(1,1)-X(2,2));
	# off = sqrt(tau^2+1);
	# v = [tau - off, tau + off];
	# [d,w] = min(abs(v));
	# c = 1/sqrt(1+v(w)^2);
	# s = v(w)*c;
	# end
	# Q = [c -s;s c];

	def swaplist(p, s, z, b):
	r'''Produces the list v of swaps of neighboring blocks needed to order the eigenvalues assembled in the vector v
	from closest to z to farthest away from z, taking into account the parameter b.

	:param p: list
	list of eigenvalues (only one copy for each complex-conjugate pair)
	:param s: list
	list of the the-top left positions of each block
	:param z: complex
	target z in the complex plane
	:param b: float
	determines the length of the ordering with respect to z to be produced
	* if b < 0 then -b blocks will be sorted,
	* if b > 0 then b or b+1 eigenvalues will be sorted, depending on the sizes of the blocks,
	* if b = 0 then the whole Schur form will be sorted.
	:return:
	'''
	p_orig = p # debug
	n = len(p)
	p = list(p)
	k = 0
	v = []
	srtd = 0 # Number of sorted eigenvalues.
	q = list(np.diff(s)) # Compute block sizes.
	q_orig = list(q) # debug
	fini = False
	while not fini:
	_, j = select(p[k:n], z) # Determine which block will go to position k
	p_j = p[k + j] # debug
	p[k:n + 1] = [p[j + k]] + p[k:n] # insert this block at position k,
	assert p[k]==p_j # debug
	del p[j + k + 1] # and remove it from where it was taken.
	if expensive_asserts and __debug__: assert np.all(sorted(p)==sorted(p_orig)) # debug
	q_j = q[k + j] # debug
	q[k:n + 1] = [q[j + k]] + q[k:n] # Similar for the block-sizes
	assert q[k] == q_j # debug
	del q[j + k + 1]
	if expensive_asserts and __debug__: assert np.all(sorted(q) == sorted(q_orig)) # debug
	v = v + list(range(k, j + k))[::-1] # Update the list of swaps for this block
	srtd = srtd + q[k] # Update the number of sorted eigenvalues
	k += 1
	fini = (k >= n - 1 or k == -b or srtd == b or (srtd == b + 1 and b != 0))
	return v

	# function v = swaplist(p,s,z,b);
	# n = length(p);
	# k = 0; v = [];
	# srtd = 0;
	# q = diff(s);
	# fini = 0;
	# while ~fini
	# k = k+1;
	# [dum,j] = select(p(k:n),z);
	# p(k:n+1) = [p(j+k-1) p(k:n)];
	# p(j+k) = [];
	# q(k:n+1) = [q(j+k-1) q(k:n)];
	# q(j+k) = [];
	# v = [v,j+k-2:-1:k];
	# srtd = srtd + q(k);
	# fini = (k==n-1)\|(k==-b)\|(srtd==b)\|((srtd==b+1)&(b~=0));
	# end


	def select(p, z):
	r'''Determined which element is next in the ordering.

	:param p:
	:param z:
	:return:
	'''
	if np.isinf(z):
	pos = np.argmax(np.abs(p))
	return -abs(p[pos]), pos
	else:
	y = np.real(z) + np.abs(np.imag(z))*1j # Move target to the upper half plane.
	delta = np.abs(np.array(p) - y)
	pos = np.argmin(delta) # Find block closest to the target.
	return delta[pos], pos

	# function [val,pos] = select(p,z);
	# y = real(z)+abs(imag(z))*i;
	# [val pos] = min(abs(p-y));


	def swap(U, S, v, w, inplace=False):
	r'''Swaps the two diagonal blocks at positions symbolized by the entries of v and w.

	:param U:
	:param S:
	:param v:
	:param w:
	:return:
	'''
	p, q = S[v, :][:, w].shape # p and q are block sizes
	Ip = np.eye(p)
	Iq = np.eye(q)
	r = np.concatenate([S[v, w[j]] for j in range(q)]) # Vectorize right-hand side for Kronecker product formulation of the Sylvester equations (7).
	K = np.kron(Iq, S[v, :][:, v]) - np.kron(S[w, :][:, w].T, Ip) # Kronecker product system matrix.
	L, H, P, Q = lu_complpiv(K, overwrite=True) # LU-decomposition of this matrix.
	e = np.min(np.abs(np.diag(H))) # Scaling factor to prevent overflow.
	sigp = np.arange(p*q)
	for k in range(p*q - 1): # Implement permutation P of the LU-decomposition PAQ=LU ...
	sigp[[k, P[k]]] = sigp[[P[k], k]].copy()
	r = e*r[sigp] # ... scale and permute the right-hand side.
	x = np.linalg.solve(H, np.linalg.solve(L, r)) # and solve the two triangular systems.
	sigq = np.arange(p*q)
	for k in range(p*q - 1): # Implement permutation Q of the LU-decomposition PAQ=LU ...
	sigq[[k, Q[k]]] = sigq[[Q[k], k]].copy()
	x[sigq] = x.copy() # ... and permute the solution.
	X = np.vstack([x[jp:(j + 1)p] for j in range(q)]).T # De-vectorize the solution back to a block, or, quit Kronecker formulation.
	Q, R = np.linalg.qr(np.vstack((-X, e*Iq)), mode='complete') # Householder QR-decomposition of X.
	vw = list(v) + list(w)
	if not inplace:
	S = S.copy()
	U = U.copy()
	S[:, vw] = np.dot(S[:, vw], Q) # Perform the actual swap by left- and right-multiplication of S by Q,
	S[vw, :] = np.dot(Q.T, S[vw, :])
	U[:, vw] = np.dot(U[:, vw], Q) # and, right-multiplication of U by Q
	return U, S

	# function [U,S] = swap(U,S,v,w);
	# [p,q] = size(S(v,w)); Ip = eye(p); Iq = eye(q);
	# r = [];
	# for j=1:q
	# r = [r;S(v,w(j))];
	# end
	# K = kron(Iq,S(v,v))-kron(S(w,w)',Ip);
	# [L,H,P,Q] = lu_complpiv(K);
	# e = min(abs(diag(H)));
	# sigp = 1:p*q;
	# for k = 1:p*q-1;
	# sigp([k,P(k)]) = sigp([P(k),k]);
	# end
	# r = e*r(sigp);
	# x = (H\(L\r));
	# sigq = 1:p*q;
	# for k = 1:p*q-1;
	# sigq([k,Q(k)]) = sigq([Q(k),k]);
	# end
	# x(sigq) = x;
	# X = [];
	# for j=1:q
	# X = [X,x((j-1)p+1:jp)];
	# end
	# [Q,R] = qr([-X;e*Iq]);
	# S(:,[v,w]) = S(:,[v,w])*Q;
	# S([v,w],:) = Q'*S([v,w],:);
	# U(:,[v,w]) = U(:,[v,w])*Q;

	def lu_complpiv(A, overwrite=False):
	'''Computes the LU-decomposition of A with complete pivoting, i. e. PAQ=LU, with permutations symbolized by vectors.

	:param A:
	:return:
	'''
	if not overwrite or (__debug__ and expensive_asserts):
	A_inp = A # debug
	A = A.copy()
	n = A.shape[0]
	P = np.zeros(n - 1, dtype=int)
	Q = np.zeros(n - 1, dtype=int)
	for k in range(n - 1): # See Golub and Van Loan, p. 118 for comments on this LU-decomposition with complete pivoting.
	Ak = A[k:n, :][:, k:n]
	rw, cl = np.unravel_index(np.argmax(np.abs(Ak), axis=None), Ak.shape)
	rw += k
	cl += k
	A[[k, rw], :] = A[[rw, k], :].copy()
	A[:, [k, cl]] = A[:, [cl, k]].copy()
	P[k] = rw
	Q[k] = cl
	if A[k, k] != 0:
	rs = slice(k + 1, n)
	A[rs, k] = A[rs, k] / A[k, k]
	A[rs, :][:, rs] = A[rs, :][:, rs] - A[rs, k][:, np.newaxis]*A[k, rs]
	U = np.tril(A.T).T
	L = np.tril(A, -1) + np.eye(n)
	if __debug__ and expensive_asserts:
	perm_p = np.arange(n) # debug
	for k in range(n - 1): # debug
	perm_p[[k, P[k]]] = perm_p[[P[k], k]].copy() # debug
	perm_q = np.arange(n) # debug
	for k in range(n - 1): # debug
	perm_q[[k, Q[k]]] = perm_q[[Q[k], k]].copy() # debug
	assert np.allclose(A_inp[perm_p, :][:, perm_q], np.dot(L, U)) # debug
	return L, U, P, Q

	# function [L,U,P,Q] = lu_complpiv(A);
	# P = []; Q = []; n = size(A,1);
	# for k=1:n-1;
	# [a,r] = max(abs(A(k:n,k:n)));
	# [dummy,c] = max(abs(a));
	# cl = c+k-1;
	# rw = r(c)+k-1;
	# A([k,rw],:) = A([rw,k],:);
	# A(:,[k,cl]) = A(:,[cl,k]);
	# P(k) = rw; Q(k) = cl;
	# if A(k,k) ~= 0;
	# rs = k+1:n;
	# A(rs,k) = A(rs,k)/A(k,k);
	# A(rs,rs) = A(rs,rs)-A(rs,k)*A(k,rs);
	# end
	# end
	# U = tril(A')'; L = tril(A,-1) + eye(n);

	if __name__=='__main__':
	import scipy
	import scipy.linalg
	expensive_asserts = True
	for m in range(100):
	n = np.random.randint(2, 50)
	A = np.random.randn(n, n)
	if m%10==0:
	z = np.inf
	else:
	z = float(np.random.randn(1))# + abs(float(np.random.randn(1)))*1j # TODO: rewrite the whole test to cover complex
	R, Q = scipy.linalg.schur(A, output='real')
	T, Z = scipy.linalg.rsf2csf(R, Q)
	ev_orig = np.diag(T)
	delta_orig = np.abs(ev_orig - z)
	eps = np.finfo(R.dtype).eps
	assert np.allclose(np.dot(A, Q), np.dot(Q, R))
	r = np.count_nonzero(np.abs(np.diag(R, -1)) > 100*eps)

	Q, R, ap = sort_real_schur(Q, R, z, 0, inplace=(m%2==0))

	assert np.allclose(np.dot(A, Q), np.dot(Q, R)) # check that still a decomposition of the original matrix

	# test that Q and R have the correct structure
	assert np.allclose(np.dot(Q, Q.T), np.eye(A.shape[0])) # Q orthonormal
	assert np.all(np.tril(R, -2) == 0) # R triangular
	assert r == np.count_nonzero(np.abs(np.diag(R, -1)) > 100 * eps) # number of blocks in R is preserved

	# check that eigenvalues are sorted
	T, Z = scipy.linalg.rsf2csf(R, Q)
	ev = np.diag(T)
	# check that eigenvalues were preserved
	for a, b in zip(np.sort(ev), np.sort(ev_orig)):
	assert np.allclose(a, b) or np.allclose(a, np.conj(b)) # TODO: check that both b and conj(b) are present in list
	if np.isinf(z):
	delta = -np.abs(ev)
	else:
	delta = np.abs(ev - z)
	assert np.all(delta[0:-1] <= delta[1:] + 100*eps), (np.max(delta[0:-1]-delta[1:]), delta)
	print('OK')