sigman78/bink_dct.py

## bink_dct.py
# fast form of the 8x8 dct/idct
# 16bit integer, simd-capable

"""
http://fgiesen.wordpress.com/2013/11/10/bink-2-2-integer-dct-design-part-2/

The DCT described last time (and implemented with the Matlab/Octave code above)
is a scaled DCT, and I didn’t mention the scale factors last time.
However, they’re easy enough to solve for. We have a forward transform matrix, M.
Since the rows are orthogonal (by construction), the inverse of M (and hence our IDCT)
is its transpose, up to scaling. We can write this as matrix equation I = M^T S M
where S ought to be a diagonal matrix. Solving for S yields S = (M M^T)^{-1}.
This gives the total scaling when multiplying by both M and M^T;
what we really want is a normalizing diagonal matrix N
such that \tilde{M} = NM is orthogonal, which means

I = \tilde{M}^T \tilde{M} = M^T N^T N M = M^T (N^2) M

since N is diagonal, so N^2 = S, and again because the two are diagonal
this just means that the diagonal entries of N are the square roots
of the diagonal entries of S. Then we multiply once by N just before
quantization on the encoder side, and another time by N just after
dequantization on the decoder side. In practice, this scaling
can be folded directly into the quantization/dequantization process,
but conceptually they’re distinct steps.

This normalization is the right one to use to compare against the “proper”
orthogonal DCT-II. That said, with a non-lifting all-integer transform,
normalizing to unit gain is a bad idea; we actually want some gain to reduce
round-off error. A good compromise is to normalize everything to the gain
of the DC coefficient, which is \sqrt{8} for the 1D transform.
Using this normalization ensures that DC, which usually contains
a significant amount of the overall energy, is not subject to
scaling-induced round-off error and thus reproduced exactly.

In the 2D transform, each coefficient takes part in first a column DCT
and then a row DCT (or vice versa). That means the gain of a separable 2D 8x8 DCT
is the square of the gain for the 1D DCT, which in our case means 8.
And since the IDCT is just the transpose of the DCT, it has the same gain,
so following up the 2D DCT with the corresponding 2D IDCT introduces another 8x gain increase,
bringing the total gain for the transform → normalize → inverse transform chain up to 64.
"""

from numpy import *
from numpy import inf as INFI
import math

M = matrix([
	[1, 2, 3, 4, 5, 6, 7, 8],
	[0, 0, 0, 0, 0, 0, 0, 0],
	[0, 0, 0, 0, 0, 0, 0, 0],
	[0, 0, 0, 0, 0, 0, 0, 0],
	[0, 0, 0, 0, 0, 0, 0, 0],
	[0, 0, 0, 0, 0, 0, 0, 0],
	[0, 0, 0, 0, 0, 0, 0, 0],
	[1, 1, 1, 1, 1, 1, 1, 1] ], dtype=float)

M1 = matrix([
	[114, 108, 100, 99, 109, 129, 152, 166],
	[109, 102,  95, 94, 104, 124, 146, 161],
	[ 99,  93,  85, 84,  94, 114, 137, 151],
	[ 86,  80,  72, 71,  82, 102, 124, 138],
	[ 73,  66,  58, 57,  68,  88, 110, 125],
	[ 60,  53,  46, 45,  55,  75,  97, 112],
	[ 50,  43,  36, 35,  45,  65,  88, 102],
	[ 45,  38,  31, 30,  40,  60,  82,  97]], dtype=int)

M1dct = matrix([
	[700, 200, 0, 0, 0, 0, 0, 0],
	[-150, 0, 0, 0, 0, 0, 0, 0],
	[110, 0, 0, 0, 0, 0, 0, 0],
	[0, 0, 0, 0, 0, 0, 0, 0],
	[0, 0, 0, 0, 0, 0, 0, 0],
	[0, 0, 0, 0, 0, 0, 0, 0],
	[0, 0, 0, 0, 0, 0, 0, 0],
	[0, 0, 0, 0, 0, 0, 0, 0] ])

M2 = eye(8)

GAIN = 8

def dct(m):
	i0 = m[0,:]
	i1 = m[1,:]
	i2 = m[2,:]
	i3 = m[3,:]
	i4 = m[4,:]
	i5 = m[5,:]
	i6 = m[6,:]
	i7 = m[7,:]

	a0 = i0 + i7
	a1 = i1 + i6
	a2 = i2 + i5
	a3 = i3 + i4
	a4 = i0 - i7
	a5 = i1 - i6
	a6 = i2 - i5
	a7 = i3 - i4

	# even stage
	b0 = a0 + a3
	b1 = a1 + a2
	b2 = a0 - a3
	b3 = a1 - a2

	# even stage
	c0 = b0 + b1
	c1 = b0 - b1
	c2 = b2 + b2/4 + b3/2
	c3 = b2/2 - b3 - b3/4

	# odd stage
	b4 = a7/4 + a4 + a4/4 - a4/16
	b7 = a4/4 - a7 - a7/4 + a7/16
	b5 = a5 + a6 - a6/4 - a6/16
	b6 = a6 - a5 + a5/4 + a5/16

	# odd stage
	c4 = b4 + b5
	c5 = b4 - b5
	c6 = b6 + b7
	c7 = b6 - b7

	# odd stage
	d4 = c4
	d5 = c5 + c7
	d6 = c5 - c7
	d7 = c6

	return matrix(vstack([c0, d4, c2, d6, c1, d5, c3, d7]))

def idct(m):
	c0 = m[0,:]
	d4 = m[1,:]
	c2 = m[2,:]
	d6 = m[3,:]
	c1 = m[4,:]
	d5 = m[5,:]
	c3 = m[6,:]
	d7 = m[7,:]

	# odd stage
	c4 = d4
	c5 = d5 + d6
	c7 = d5 - d6
	c6 = d7

	# odd stage
	b4 = c4 + c5
	b5 = c4 - c5
	b6 = c6 + c7
	b7 = c6 - c7

	# even stage
	b0 = c0 + c1
	b1 = c0 - c1
	b2 = c2 + c2/4 + c3/2
	b3 = c2/2 - c3 - c3/4

	# odd
	a4 = b7/4 + b4 + b4/4 - b4/16
	a7 = b4/4 - b7 - b7/4 + b7/16
	a5 = b5 - b6 + b6/4 + b6/16
	a6 = b6 + b5 - b5/4 - b5/16

	# even stage
	a0 = b0 + b2
	a1 = b1 + b3
	a2 = b1 - b3
	a3 = b0 - b2

	# stage 1
	z0 = a0 + a4
	z1 = a1 + a5
	z2 = a2 + a6
	z3 = a3 + a7
	z4 = a3 - a7
	z5 = a2 - a6
	z6 = a1 - a5
	z7 = a0 - a4

	return matrix(vstack([z0, z1, z2, z3, z4, z5, z6, z7]))

rF = matrix("""
  1.00000   1.00000   1.00000   1.00000   1.00000   1.00000   1.00000   1.00000;
   1.18750   1.00000   0.68750   0.25000  -0.25000  -0.68750  -1.00000  -1.18750;
   1.25000   0.50000  -0.50000  -1.25000  -1.25000  -0.50000   0.50000   1.25000;
   1.43750  -0.31250  -1.68750  -0.93750   0.93750   1.68750   0.31250  -1.43750;
   1.00000  -1.00000  -1.00000   1.00000   1.00000  -1.00000  -1.00000   1.00000;
   0.93750  -1.68750   0.31250   1.43750  -1.43750  -0.31250   1.68750  -0.93750;
   0.50000  -1.25000   1.25000  -0.50000  -0.50000   1.25000  -1.25000   0.50000;
   0.25000  -0.68750   1.00000  -1.18750   1.18750  -1.00000   0.68750  -0.25000
""")

rS = matrix("""
1.00000  0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  0.00000;
  0.00000  1.35809  0.00000  0.00000  0.00000  0.00000  0.00000  0.00000;
  0.00000  0.00000  1.10345  0.00000  0.00000  0.00000  0.00000  0.00000;
  0.00000  0.00000  0.00000  0.67905  0.00000  0.00000  0.00000  0.00000;
  0.00000  0.00000  0.00000  0.00000  1.00000  0.00000  0.00000  0.00000;
  0.00000  0.00000  0.00000  0.00000  0.00000  0.67905  0.00000  0.00000;
  0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  1.10345  0.00000;
  0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  1.35809
""")

def verify(I):
	F = dct(I)
	#print F.round(5)
	#print "dif", F - rF
	#print diag(F * F.H)
	#print diag(F * F.H)
	S = diag(GAIN / diag(F * F.H))
	#print "S", S.round(4)
	#print "dif", (S - rS).round(5)
	inv4 = idct(I)
	#print inv4
	scaled = kron(S, S) * kron(F, F)

	# 72.047
	# print linalg.norm(kron(I, I) * scaled)
	print linalg.norm(kron(I, I) * scaled, INFI)

	# 67.905
	print linalg.norm(kron(I, inv4) * scaled, INFI)
	# 64
	print linalg.norm(kron(inv4, inv4) * scaled, INFI)

SCALE = 1

FI = dct(eye(8))
#S = diag(GAIN / diag(FI * FI.H))
S = linalg.inv(FI * FI.T)
N = sqrt(S)

##############################
IN = M1
IN_DCT = M1dct

set_printoptions(suppress=True)

print "M:\n", dct(8 * eye(8))

m = dct(IN).T * N
m = dct(m).T * N
print "DCT:\n", (m).round()
print "diff:\n", (IN_DCT.T - m).round(2), "\n", linalg.norm(IN_DCT.T - m, INFI)

b = idct(m).T
b = idct(b).T
r = b / (GAIN)
print "IDCT:\n", r.round()
print "diff:\n", (r - IN).round(2), "\n", linalg.norm(r - IN, INFI)

#verify(eye(8))
	# fast form of the 8x8 dct/idct
	# 16bit integer, simd-capable

	"""
	http://fgiesen.wordpress.com/2013/11/10/bink-2-2-integer-dct-design-part-2/

	The DCT described last time (and implemented with the Matlab/Octave code above)
	is a scaled DCT, and I didn’t mention the scale factors last time.
	However, they’re easy enough to solve for. We have a forward transform matrix, M.
	Since the rows are orthogonal (by construction), the inverse of M (and hence our IDCT)
	is its transpose, up to scaling. We can write this as matrix equation I = M^T S M
	where S ought to be a diagonal matrix. Solving for S yields S = (M M^T)^{-1}.
	This gives the total scaling when multiplying by both M and M^T;
	what we really want is a normalizing diagonal matrix N
	such that \tilde{M} = NM is orthogonal, which means

	I = \tilde{M}^T \tilde{M} = M^T N^T N M = M^T (N^2) M

	since N is diagonal, so N^2 = S, and again because the two are diagonal
	this just means that the diagonal entries of N are the square roots
	of the diagonal entries of S. Then we multiply once by N just before
	quantization on the encoder side, and another time by N just after
	dequantization on the decoder side. In practice, this scaling
	can be folded directly into the quantization/dequantization process,
	but conceptually they’re distinct steps.

	This normalization is the right one to use to compare against the “proper”
	orthogonal DCT-II. That said, with a non-lifting all-integer transform,
	normalizing to unit gain is a bad idea; we actually want some gain to reduce
	round-off error. A good compromise is to normalize everything to the gain
	of the DC coefficient, which is \sqrt{8} for the 1D transform.
	Using this normalization ensures that DC, which usually contains
	a significant amount of the overall energy, is not subject to
	scaling-induced round-off error and thus reproduced exactly.

	In the 2D transform, each coefficient takes part in first a column DCT
	and then a row DCT (or vice versa). That means the gain of a separable 2D 8x8 DCT
	is the square of the gain for the 1D DCT, which in our case means 8.
	And since the IDCT is just the transpose of the DCT, it has the same gain,
	so following up the 2D DCT with the corresponding 2D IDCT introduces another 8x gain increase,
	bringing the total gain for the transform → normalize → inverse transform chain up to 64.
	"""

	from numpy import *
	from numpy import inf as INFI
	import math

	M = matrix([
	[1, 2, 3, 4, 5, 6, 7, 8],
	[0, 0, 0, 0, 0, 0, 0, 0],
	[0, 0, 0, 0, 0, 0, 0, 0],
	[0, 0, 0, 0, 0, 0, 0, 0],
	[0, 0, 0, 0, 0, 0, 0, 0],
	[0, 0, 0, 0, 0, 0, 0, 0],
	[0, 0, 0, 0, 0, 0, 0, 0],
	[1, 1, 1, 1, 1, 1, 1, 1] ], dtype=float)

	M1 = matrix([
	[114, 108, 100, 99, 109, 129, 152, 166],
	[109, 102, 95, 94, 104, 124, 146, 161],
	[ 99, 93, 85, 84, 94, 114, 137, 151],
	[ 86, 80, 72, 71, 82, 102, 124, 138],
	[ 73, 66, 58, 57, 68, 88, 110, 125],
	[ 60, 53, 46, 45, 55, 75, 97, 112],
	[ 50, 43, 36, 35, 45, 65, 88, 102],
	[ 45, 38, 31, 30, 40, 60, 82, 97]], dtype=int)

	M1dct = matrix([
	[700, 200, 0, 0, 0, 0, 0, 0],
	[-150, 0, 0, 0, 0, 0, 0, 0],
	[110, 0, 0, 0, 0, 0, 0, 0],
	[0, 0, 0, 0, 0, 0, 0, 0],
	[0, 0, 0, 0, 0, 0, 0, 0],
	[0, 0, 0, 0, 0, 0, 0, 0],
	[0, 0, 0, 0, 0, 0, 0, 0],
	[0, 0, 0, 0, 0, 0, 0, 0] ])

	M2 = eye(8)

	GAIN = 8

	def dct(m):
	i0 = m[0,:]
	i1 = m[1,:]
	i2 = m[2,:]
	i3 = m[3,:]
	i4 = m[4,:]
	i5 = m[5,:]
	i6 = m[6,:]
	i7 = m[7,:]

	a0 = i0 + i7
	a1 = i1 + i6
	a2 = i2 + i5
	a3 = i3 + i4
	a4 = i0 - i7
	a5 = i1 - i6
	a6 = i2 - i5
	a7 = i3 - i4

	# even stage
	b0 = a0 + a3
	b1 = a1 + a2
	b2 = a0 - a3
	b3 = a1 - a2

	# even stage
	c0 = b0 + b1
	c1 = b0 - b1
	c2 = b2 + b2/4 + b3/2
	c3 = b2/2 - b3 - b3/4

	# odd stage
	b4 = a7/4 + a4 + a4/4 - a4/16
	b7 = a4/4 - a7 - a7/4 + a7/16
	b5 = a5 + a6 - a6/4 - a6/16
	b6 = a6 - a5 + a5/4 + a5/16

	# odd stage
	c4 = b4 + b5
	c5 = b4 - b5
	c6 = b6 + b7
	c7 = b6 - b7

	# odd stage
	d4 = c4
	d5 = c5 + c7
	d6 = c5 - c7
	d7 = c6

	return matrix(vstack([c0, d4, c2, d6, c1, d5, c3, d7]))

	def idct(m):
	c0 = m[0,:]
	d4 = m[1,:]
	c2 = m[2,:]
	d6 = m[3,:]
	c1 = m[4,:]
	d5 = m[5,:]
	c3 = m[6,:]
	d7 = m[7,:]

	# odd stage
	c4 = d4
	c5 = d5 + d6
	c7 = d5 - d6
	c6 = d7

	# odd stage
	b4 = c4 + c5
	b5 = c4 - c5
	b6 = c6 + c7
	b7 = c6 - c7

	# even stage
	b0 = c0 + c1
	b1 = c0 - c1
	b2 = c2 + c2/4 + c3/2
	b3 = c2/2 - c3 - c3/4

	# odd
	a4 = b7/4 + b4 + b4/4 - b4/16
	a7 = b4/4 - b7 - b7/4 + b7/16
	a5 = b5 - b6 + b6/4 + b6/16
	a6 = b6 + b5 - b5/4 - b5/16

	# even stage
	a0 = b0 + b2
	a1 = b1 + b3
	a2 = b1 - b3
	a3 = b0 - b2

	# stage 1
	z0 = a0 + a4
	z1 = a1 + a5
	z2 = a2 + a6
	z3 = a3 + a7
	z4 = a3 - a7
	z5 = a2 - a6
	z6 = a1 - a5
	z7 = a0 - a4

	return matrix(vstack([z0, z1, z2, z3, z4, z5, z6, z7]))

	rF = matrix("""
	1.00000 1.00000 1.00000 1.00000 1.00000 1.00000 1.00000 1.00000;
	1.18750 1.00000 0.68750 0.25000 -0.25000 -0.68750 -1.00000 -1.18750;
	1.25000 0.50000 -0.50000 -1.25000 -1.25000 -0.50000 0.50000 1.25000;
	1.43750 -0.31250 -1.68750 -0.93750 0.93750 1.68750 0.31250 -1.43750;
	1.00000 -1.00000 -1.00000 1.00000 1.00000 -1.00000 -1.00000 1.00000;
	0.93750 -1.68750 0.31250 1.43750 -1.43750 -0.31250 1.68750 -0.93750;
	0.50000 -1.25000 1.25000 -0.50000 -0.50000 1.25000 -1.25000 0.50000;
	0.25000 -0.68750 1.00000 -1.18750 1.18750 -1.00000 0.68750 -0.25000
	""")

	rS = matrix("""
	1.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000;
	0.00000 1.35809 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000;
	0.00000 0.00000 1.10345 0.00000 0.00000 0.00000 0.00000 0.00000;
	0.00000 0.00000 0.00000 0.67905 0.00000 0.00000 0.00000 0.00000;
	0.00000 0.00000 0.00000 0.00000 1.00000 0.00000 0.00000 0.00000;
	0.00000 0.00000 0.00000 0.00000 0.00000 0.67905 0.00000 0.00000;
	0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 1.10345 0.00000;
	0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 1.35809
	""")

	def verify(I):
	F = dct(I)
	#print F.round(5)
	#print "dif", F - rF
	#print diag(F * F.H)
	#print diag(F * F.H)
	S = diag(GAIN / diag(F * F.H))
	#print "S", S.round(4)
	#print "dif", (S - rS).round(5)
	inv4 = idct(I)
	#print inv4
	scaled = kron(S, S) * kron(F, F)

	# 72.047
	# print linalg.norm(kron(I, I) * scaled)
	print linalg.norm(kron(I, I) * scaled, INFI)

	# 67.905
	print linalg.norm(kron(I, inv4) * scaled, INFI)
	# 64
	print linalg.norm(kron(inv4, inv4) * scaled, INFI)

	SCALE = 1

	FI = dct(eye(8))
	#S = diag(GAIN / diag(FI * FI.H))
	S = linalg.inv(FI * FI.T)
	N = sqrt(S)

	##############################
	IN = M1
	IN_DCT = M1dct

	set_printoptions(suppress=True)

	print "M:\n", dct(8 * eye(8))

	m = dct(IN).T * N
	m = dct(m).T * N
	print "DCT:\n", (m).round()
	print "diff:\n", (IN_DCT.T - m).round(2), "\n", linalg.norm(IN_DCT.T - m, INFI)

	b = idct(m).T
	b = idct(b).T
	r = b / (GAIN)
	print "IDCT:\n", r.round()
	print "diff:\n", (r - IN).round(2), "\n", linalg.norm(r - IN, INFI)

	#verify(eye(8))