Last active
December 31, 2015 16:29
-
-
Save sigman78/8013697 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# fast form of the 8x8 dct/idct | |
# 16bit integer, simd-capable | |
""" | |
http://fgiesen.wordpress.com/2013/11/10/bink-2-2-integer-dct-design-part-2/ | |
The DCT described last time (and implemented with the Matlab/Octave code above) | |
is a scaled DCT, and I didn’t mention the scale factors last time. | |
However, they’re easy enough to solve for. We have a forward transform matrix, M. | |
Since the rows are orthogonal (by construction), the inverse of M (and hence our IDCT) | |
is its transpose, up to scaling. We can write this as matrix equation I = M^T S M | |
where S ought to be a diagonal matrix. Solving for S yields S = (M M^T)^{-1}. | |
This gives the total scaling when multiplying by both M and M^T; | |
what we really want is a normalizing diagonal matrix N | |
such that \tilde{M} = NM is orthogonal, which means | |
I = \tilde{M}^T \tilde{M} = M^T N^T N M = M^T (N^2) M | |
since N is diagonal, so N^2 = S, and again because the two are diagonal | |
this just means that the diagonal entries of N are the square roots | |
of the diagonal entries of S. Then we multiply once by N just before | |
quantization on the encoder side, and another time by N just after | |
dequantization on the decoder side. In practice, this scaling | |
can be folded directly into the quantization/dequantization process, | |
but conceptually they’re distinct steps. | |
This normalization is the right one to use to compare against the “proper” | |
orthogonal DCT-II. That said, with a non-lifting all-integer transform, | |
normalizing to unit gain is a bad idea; we actually want some gain to reduce | |
round-off error. A good compromise is to normalize everything to the gain | |
of the DC coefficient, which is \sqrt{8} for the 1D transform. | |
Using this normalization ensures that DC, which usually contains | |
a significant amount of the overall energy, is not subject to | |
scaling-induced round-off error and thus reproduced exactly. | |
In the 2D transform, each coefficient takes part in first a column DCT | |
and then a row DCT (or vice versa). That means the gain of a separable 2D 8x8 DCT | |
is the square of the gain for the 1D DCT, which in our case means 8. | |
And since the IDCT is just the transpose of the DCT, it has the same gain, | |
so following up the 2D DCT with the corresponding 2D IDCT introduces another 8x gain increase, | |
bringing the total gain for the transform → normalize → inverse transform chain up to 64. | |
""" | |
from numpy import * | |
from numpy import inf as INFI | |
import math | |
M = matrix([ | |
[1, 2, 3, 4, 5, 6, 7, 8], | |
[0, 0, 0, 0, 0, 0, 0, 0], | |
[0, 0, 0, 0, 0, 0, 0, 0], | |
[0, 0, 0, 0, 0, 0, 0, 0], | |
[0, 0, 0, 0, 0, 0, 0, 0], | |
[0, 0, 0, 0, 0, 0, 0, 0], | |
[0, 0, 0, 0, 0, 0, 0, 0], | |
[1, 1, 1, 1, 1, 1, 1, 1] ], dtype=float) | |
M1 = matrix([ | |
[114, 108, 100, 99, 109, 129, 152, 166], | |
[109, 102, 95, 94, 104, 124, 146, 161], | |
[ 99, 93, 85, 84, 94, 114, 137, 151], | |
[ 86, 80, 72, 71, 82, 102, 124, 138], | |
[ 73, 66, 58, 57, 68, 88, 110, 125], | |
[ 60, 53, 46, 45, 55, 75, 97, 112], | |
[ 50, 43, 36, 35, 45, 65, 88, 102], | |
[ 45, 38, 31, 30, 40, 60, 82, 97]], dtype=int) | |
M1dct = matrix([ | |
[700, 200, 0, 0, 0, 0, 0, 0], | |
[-150, 0, 0, 0, 0, 0, 0, 0], | |
[110, 0, 0, 0, 0, 0, 0, 0], | |
[0, 0, 0, 0, 0, 0, 0, 0], | |
[0, 0, 0, 0, 0, 0, 0, 0], | |
[0, 0, 0, 0, 0, 0, 0, 0], | |
[0, 0, 0, 0, 0, 0, 0, 0], | |
[0, 0, 0, 0, 0, 0, 0, 0] ]) | |
M2 = eye(8) | |
GAIN = 8 | |
def dct(m): | |
i0 = m[0,:] | |
i1 = m[1,:] | |
i2 = m[2,:] | |
i3 = m[3,:] | |
i4 = m[4,:] | |
i5 = m[5,:] | |
i6 = m[6,:] | |
i7 = m[7,:] | |
a0 = i0 + i7 | |
a1 = i1 + i6 | |
a2 = i2 + i5 | |
a3 = i3 + i4 | |
a4 = i0 - i7 | |
a5 = i1 - i6 | |
a6 = i2 - i5 | |
a7 = i3 - i4 | |
# even stage | |
b0 = a0 + a3 | |
b1 = a1 + a2 | |
b2 = a0 - a3 | |
b3 = a1 - a2 | |
# even stage | |
c0 = b0 + b1 | |
c1 = b0 - b1 | |
c2 = b2 + b2/4 + b3/2 | |
c3 = b2/2 - b3 - b3/4 | |
# odd stage | |
b4 = a7/4 + a4 + a4/4 - a4/16 | |
b7 = a4/4 - a7 - a7/4 + a7/16 | |
b5 = a5 + a6 - a6/4 - a6/16 | |
b6 = a6 - a5 + a5/4 + a5/16 | |
# odd stage | |
c4 = b4 + b5 | |
c5 = b4 - b5 | |
c6 = b6 + b7 | |
c7 = b6 - b7 | |
# odd stage | |
d4 = c4 | |
d5 = c5 + c7 | |
d6 = c5 - c7 | |
d7 = c6 | |
return matrix(vstack([c0, d4, c2, d6, c1, d5, c3, d7])) | |
def idct(m): | |
c0 = m[0,:] | |
d4 = m[1,:] | |
c2 = m[2,:] | |
d6 = m[3,:] | |
c1 = m[4,:] | |
d5 = m[5,:] | |
c3 = m[6,:] | |
d7 = m[7,:] | |
# odd stage | |
c4 = d4 | |
c5 = d5 + d6 | |
c7 = d5 - d6 | |
c6 = d7 | |
# odd stage | |
b4 = c4 + c5 | |
b5 = c4 - c5 | |
b6 = c6 + c7 | |
b7 = c6 - c7 | |
# even stage | |
b0 = c0 + c1 | |
b1 = c0 - c1 | |
b2 = c2 + c2/4 + c3/2 | |
b3 = c2/2 - c3 - c3/4 | |
# odd | |
a4 = b7/4 + b4 + b4/4 - b4/16 | |
a7 = b4/4 - b7 - b7/4 + b7/16 | |
a5 = b5 - b6 + b6/4 + b6/16 | |
a6 = b6 + b5 - b5/4 - b5/16 | |
# even stage | |
a0 = b0 + b2 | |
a1 = b1 + b3 | |
a2 = b1 - b3 | |
a3 = b0 - b2 | |
# stage 1 | |
z0 = a0 + a4 | |
z1 = a1 + a5 | |
z2 = a2 + a6 | |
z3 = a3 + a7 | |
z4 = a3 - a7 | |
z5 = a2 - a6 | |
z6 = a1 - a5 | |
z7 = a0 - a4 | |
return matrix(vstack([z0, z1, z2, z3, z4, z5, z6, z7])) | |
rF = matrix(""" | |
1.00000 1.00000 1.00000 1.00000 1.00000 1.00000 1.00000 1.00000; | |
1.18750 1.00000 0.68750 0.25000 -0.25000 -0.68750 -1.00000 -1.18750; | |
1.25000 0.50000 -0.50000 -1.25000 -1.25000 -0.50000 0.50000 1.25000; | |
1.43750 -0.31250 -1.68750 -0.93750 0.93750 1.68750 0.31250 -1.43750; | |
1.00000 -1.00000 -1.00000 1.00000 1.00000 -1.00000 -1.00000 1.00000; | |
0.93750 -1.68750 0.31250 1.43750 -1.43750 -0.31250 1.68750 -0.93750; | |
0.50000 -1.25000 1.25000 -0.50000 -0.50000 1.25000 -1.25000 0.50000; | |
0.25000 -0.68750 1.00000 -1.18750 1.18750 -1.00000 0.68750 -0.25000 | |
""") | |
rS = matrix(""" | |
1.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000; | |
0.00000 1.35809 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000; | |
0.00000 0.00000 1.10345 0.00000 0.00000 0.00000 0.00000 0.00000; | |
0.00000 0.00000 0.00000 0.67905 0.00000 0.00000 0.00000 0.00000; | |
0.00000 0.00000 0.00000 0.00000 1.00000 0.00000 0.00000 0.00000; | |
0.00000 0.00000 0.00000 0.00000 0.00000 0.67905 0.00000 0.00000; | |
0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 1.10345 0.00000; | |
0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 1.35809 | |
""") | |
def verify(I): | |
F = dct(I) | |
#print F.round(5) | |
#print "dif", F - rF | |
#print diag(F * F.H) | |
#print diag(F * F.H) | |
S = diag(GAIN / diag(F * F.H)) | |
#print "S", S.round(4) | |
#print "dif", (S - rS).round(5) | |
inv4 = idct(I) | |
#print inv4 | |
scaled = kron(S, S) * kron(F, F) | |
# 72.047 | |
# print linalg.norm(kron(I, I) * scaled) | |
print linalg.norm(kron(I, I) * scaled, INFI) | |
# 67.905 | |
print linalg.norm(kron(I, inv4) * scaled, INFI) | |
# 64 | |
print linalg.norm(kron(inv4, inv4) * scaled, INFI) | |
SCALE = 1 | |
FI = dct(eye(8)) | |
#S = diag(GAIN / diag(FI * FI.H)) | |
S = linalg.inv(FI * FI.T) | |
N = sqrt(S) | |
############################## | |
IN = M1 | |
IN_DCT = M1dct | |
set_printoptions(suppress=True) | |
print "M:\n", dct(8 * eye(8)) | |
m = dct(IN).T * N | |
m = dct(m).T * N | |
print "DCT:\n", (m).round() | |
print "diff:\n", (IN_DCT.T - m).round(2), "\n", linalg.norm(IN_DCT.T - m, INFI) | |
b = idct(m).T | |
b = idct(b).T | |
r = b / (GAIN) | |
print "IDCT:\n", r.round() | |
print "diff:\n", (r - IN).round(2), "\n", linalg.norm(r - IN, INFI) | |
#verify(eye(8)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment