Skip to content

Instantly share code, notes, and snippets.

@michaelwooley
Last active October 22, 2018 22:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save michaelwooley/93892fd8a2727211e037b5b922185769 to your computer and use it in GitHub Desktop.
Save michaelwooley/93892fd8a2727211e037b5b922185769 to your computer and use it in GitHub Desktop.
Time- and Memory-Efficient Solution to `chol[kron[A, B]] * e`
import numpy as np
import pandas as pd
import scipy.stats as stats
import numpy.testing as npt
import matplotlib.pyplot as plt
import seaborn as sns
from memory_profiler import memory_usage
import timeit
sns.set_style('whitegrid')
sns.set_palette('deep')
import chol_kron_ab as cke
def kron_growth():
"""Growth in Size of Kronecker
"""
dfl, n_bds = [], [2, 200]
for n in range(*n_bds):
for p in [3, 6, 13]:
sz = (n * (n * p + 1))**2.
dfl.append({'n': n, 'Lags': p, 'Size': sz, 'Mb': sz * 8 * 1e-9})
df_sz = pd.DataFrame(dfl)
fig, axes = plt.subplots(
1, 2, sharex=True, sharey=False, figsize=(10, 5)
)
df_sz.pivot(
index='n', columns='Lags', values='Size'
).plot(
ax=axes[0], logy=True
)
df_sz.pivot(
index='n', columns='Lags', values='Mb'
).plot(
ax=axes[1], legend=False, logy=True
)
axes[0].set_xlabel('Number of variables (n)')
axes[1].set_xlabel('Number of variables (n)')
axes[0].set_title('Size of $A \\otimes B$: Elements')
axes[1].set_title('Size of $A \\otimes B$: Gigabytes')
fig.savefig(
'../media/chol_kron_ab_size.svg', bbox_inches=0, transparent=True
)
return None
def time_comparison():
dfl, n_bds, iters = [], [2, 25], 10
for n in range(*n_bds):
print('{}'.format(n), end=', ', flush=True)
for p in [3, 6, 13]:
k = n * p + 1
A = stats.wishart.rvs(
df=n + 2, scale=np.diag(np.random.randn(n)**2.)
)
B = stats.wishart.rvs(
df=k + 2, scale=np.diag(np.random.randn(k)**2.)
)
e = np.random.randn(k * n)
time_np = timeit.timeit(
'chol_kron_e_numpy(A, B, e)',
globals={
'chol_kron_e_numpy': cke.chol_kron_e_numpy,
'A': A,
'B': B,
'e': e
},
number=iters
) / iters
time_loop = timeit.timeit(
'chol_kron_e_loop(A, B, e)',
globals={
'chol_kron_e_loop': cke.chol_kron_e_loop,
'A': A,
'B': B,
'e': e
},
number=iters
) / iters
# time_loop = %timeit -o -q -n 1 -r 1 cke.chol_kron_e_loop(A, B, e)
dfl.append(
{
'n': n,
'Lags': p,
'k': k,
'runtime_numpy': time_np,
'runtime_loop': time_loop
}
)
df = pd.DataFrame(dfl)
df['runtime_relative'] = df['runtime_numpy'] / df['runtime_loop']
n_bds = [2, 25]
fig, axes = plt.subplots(
1, 2, sharex=True, sharey=False, figsize=(10, 5)
)
ax0 = axes[0]
df.pivot(
index='n', columns='Lags', values='runtime_relative'
).plot(ax=ax0)
ax0.set_title('Relative Runtime: Loop/NumPy')
ax0.set_ylabel('Loop / NumPy')
ax0.set_xlabel('Number of variables (n)')
ax0.set_xticks(range(*n_bds))
ax1 = axes[1]
df.pivot(
index='n', columns='Lags', values='runtime_loop'
).plot(
ax=ax1, legend=False
)
ax1.set_title('Runtime: Loop')
ax1.set_ylabel('Seconds')
ax1.set_xlabel('Number of variables (n)')
ax1.set_xticks(range(*n_bds))
fig.savefig(
'../media/chol_kron_time_usage.svg', bbox_inches=0, transparent=True
)
return None
def memory_comparison():
dfl, n_bds = [], [2, 50, 4]
for n in range(*n_bds):
print('{}'.format(n), end=', ', flush=True)
for p in [13]:
k = n * p + 1
A = stats.wishart.rvs(
df=n + 2, scale=np.diag(np.random.randn(n)**2.)
)
B = stats.wishart.rvs(
df=k + 2, scale=np.diag(np.random.randn(k)**2.)
)
e = np.random.randn(k * n)
# mem_np = %memit -o -q -r 1 cke.chol_kron_e_numpy(A, B, e)
# mem_loop = %memit -o -q -r 1 cke.chol_kron_e_loop(A, B, e)
mem_np = memory_usage(
(cke.chol_kron_e_numpy, (A, B, e)), interval=.1
)
mem_loop = memory_usage(
(cke.chol_kron_e_loop, (A, B, e)), interval=.1
)
dfl.append(
{
'n': n,
'Lags': p,
'k': k,
'mem_numpy': np.max(mem_np),
'mem_loop': np.max(mem_loop)
}
)
df_mem = pd.DataFrame(dfl)
df_mem['mem_relative'] = df_mem['mem_numpy'] / df_mem['mem_loop']
n_bds = [2, 50, 10]
fig_mem, axes = plt.subplots(
1, 2, sharex=True, sharey=False, figsize=(10, 5)
)
ax0 = axes[0]
df_mem.pivot(
index='n', columns='Lags', values='mem_relative'
).plot(
marker='o', ax=ax0
)
ax0.set_title('Relative Memory Usage')
ax0.set_ylabel('Loop / NumPys')
ax0.set_xlabel('Number of variables (n)')
ax0.set_xticks(range(*n_bds))
ax1 = axes[1]
df_mem.pivot(
index='n', columns='Lags', values='mem_loop'
).plot(
marker='o', ax=ax1, legend=False
)
ax1.set_title('Max. Memory Usage: Loop')
ax1.set_ylabel('Megabytes')
ax1.set_xlabel('Number of variables (n)')
ax1.set_xticks(range(*n_bds))
fig_mem.savefig(
'../media/chol_kron_memory_usage.svg', bbox_inches=0, transparent=True
)
return None
def large_matrices():
dfl, n_bds, iters = [], [25, 203, 5], 10
for n in range(*n_bds):
print('{}'.format(n), end=', ', flush=True)
for p in [13]: #[3, 6, 13]:
k = n * p + 1
A = stats.wishart.rvs(
df=n + 2, scale=np.diag(np.random.randn(n)**2.)
)
B = stats.wishart.rvs(
df=k + 2, scale=np.diag(np.random.randn(k)**2.)
)
e = np.random.randn(k * n)
# time_loop = %timeit -o -q -r 1 cke.chol_kron_e_loop(A, B, e)
time_loop = timeit.timeit(
'chol_kron_e_loop(A, B, e)',
globals={
'chol_kron_e_loop': cke.chol_kron_e_loop,
'A': A,
'B': B,
'e': e
},
number=iters
) / iters
mem_loop = memory_usage(
(cke.chol_kron_e_loop, (A, B, e)), interval=0.1
)
dfl.append(
{
'n': n,
'Lags': p,
'k': k,
'runtime': time_loop,
'mem_loop': np.max(mem_loop)
}
)
df = pd.DataFrame(dfl)
df.to_pickle('./df_big_linux.pkl')
fig, axes = plt.subplots(
1, 2, sharex=True, sharey=False, figsize=(10, 5)
)
rng = range(df['n'].min(), df['n'].max() + 1, 25)
ax0 = axes[0]
ax1 = axes[1]
ax0 = axes[0]
df.pivot(
index='n', columns='Lags', values='runtime'
).plot(
marker='o', ax=ax0
)
ax0.set_title('Runtime')
ax0.set_ylabel('Seconds')
ax0.set_xlabel('Number of variables (n)')
ax0.set_xticks(rng)
ax1 = axes[1]
df.pivot(
index='n', columns='Lags', values='mem_loop'
).plot(
marker='o', ax=ax1, legend=False
)
ax1.set_title('Max. Memory Usage')
ax1.set_ylabel('Megabytes')
ax1.set_xlabel('Number of variables (n)')
ax1.set_xticks(rng)
# fig.show()
fig.savefig(
'../media/chol_kron_big.svg', bbox_inches=0, transparent=True
)
return None
def time_chol():
dfl, n_bds, iters = [], [25, 203, 1], 10
for n in range(*n_bds):
for p in [13]: #[3, 6, 13]:
k = n * p + 1
A = stats.wishart.rvs(
df=n + 2, scale=np.diag(np.random.randn(n)**2.)
)
# time_loop = %timeit -o -q -r 1 cke.chol_kron_e_loop(A, B, e)
time_loop = timeit.timeit(
'chol_kron_e_loop(A)',
globals={
'chol_kron_e_loop': cke.chol,
'A': A,
},
number=iters
) / iters
time_np = timeit.timeit(
'chol_kron_e_loop(A)',
globals={
'chol_kron_e_loop': cke.np_chol,
'A': A,
},
number=iters
) / iters
print(
'{:03d} {:05.4e} {:05.4e} {:04.3f} {}'.format(
n, time_loop, time_np, time_np / time_loop, time_loop < time_np
),
flush=True
)
def time_kron():
dfl, n_bds, iters = [], [25, 35, 1], 10
for n in range(*n_bds):
for p in [13]: #[3, 6, 13]:
k = n * p + 1
A = stats.wishart.rvs(
df=n + 2, scale=np.diag(np.random.randn(n)**2.)
)
B = stats.wishart.rvs(
df=k + 2, scale=np.diag(np.random.randn(k)**2.)
)
# time_loop = %timeit -o -q -r 1 cke.chol_kron_e_loop(A, B, e)
time_loop = timeit.timeit(
'kron(A,B)',
globals={
'kron': cke.kron,
'A': A,
'B': B
},
number=iters
) / iters
time_np = timeit.timeit(
'kron(A,B)',
globals={
'kron': cke.np_kron,
'A': A,
'B': B
},
number=iters
) / iters
print(
'{:03d} {:05.4e} {:05.4e} {:04.3f} {}'.format(
n, time_loop, time_np, time_np / time_loop, time_loop < time_np
),
flush=True
)
if __name__ == "__main__":
import time
print('BENCHMARKS', flush=True)
print('Size of Matrices from Kronecker Products:', flush=True)
tic = time.time()
kron_growth()
print('\t{:6.2f} s.'.format(time.time() - tic), flush=True)
print('Time Comparison: ', flush=True)
tic = time.time()
time_comparison()
print('{}'.format(time.time() - tic), flush=True)
print('Memory Comparison: ', flush=True)
tic = time.time()
memory_comparison()
print('\t{:6.2f} s.'.format(time.time() - tic), flush=True)
# print('Large Matrix Benchmarks: ', flush=True)
# tic = time.time()
# large_matrices()
# print('\t{:6.2f} s.'.format(time.time() - tic), flush=True)
# df = pd.read_pickle('df_big.pkl')
import numba
import numpy as np
FTYPE = numba.float64
ITYPE = numba.int64
FTYPE_ = np.float64
ITYPE_ = np.int64
FVECTOR = numba.float64[:]
FMATRIX = numba.float64[:, :]
@numba.njit(FMATRIX(FMATRIX, FMATRIX), cache=True)
def kron(A: FMATRIX, B: FMATRIX) -> FMATRIX:
"""Kronecker product of two matrices A and B
Note: This appears to be faster than the NumPy implementation, which is based on reshapes. Assigning each element individually also tends to be faster than assigning by blocks a la:
out[ii * shpB[0]:(ii + 1) * shpB[0], jj * shpB[1]:(jj + 1) * shpB[1]
] = A[ii, jj] * B
Args:
A (FMATRIX): Matrix
B (FMATRIX): Matrix
Returns:
FMATRIX: kron(A,B)
"""
shpA = A.shape
shpB = B.shape
out = np.empty((shpA[0] * shpB[0], shpA[1] * shpB[1]))
for ii in range(shpA[0]):
for jj in range(shpA[1]):
for kk in range(shpB[0]):
for hh in range(shpB[1]):
out[ii * shpB[0] + kk, jj * shpB[1] + hh] = A[ii, jj] * B[kk, hh]
return out
@numba.njit(FMATRIX(FMATRIX), cache=True)
def chol(A: FMATRIX) -> FMATRIX:
"""Cholesky decomposition of Real, Symmetric, Positive Definite Matrix A
Args:
A (FMATRIX): Real, Symmetric, Positive Definite Matrix
Returns:
FMATRIX: Lower Triangular Matrix `L` s.t. `A = L * L'`
"""
m, _ = A.shape
L = np.zeros((m, m))
for ii in range(0, m):
for kk in range(0, ii + 1):
L[ii, kk] = A[ii, kk]
for jj in range(0, kk):
L[ii, kk] -= (L[ii, jj] * L[kk, jj])
if ii == kk:
L[ii, kk] **= 0.5
else:
L[ii, kk] /= L[kk, kk]
return L
@numba.njit(FMATRIX(FMATRIX), cache=True)
def np_chol(A: FMATRIX) -> FMATRIX:
return np.linalg.cholesky(A)
@numba.jit(FMATRIX(FMATRIX, FMATRIX), cache=True)
def np_kron(A: FMATRIX, B: FMATRIX) -> FMATRIX:
return np.kron(A, B)
@numba.njit(FVECTOR(FMATRIX, FMATRIX, FVECTOR), cache=True)
def chol_kron_e_loop(A: FMATRIX, B: FMATRIX, e: FVECTOR) -> FVECTOR:
"""Solution to `cholesky[kron[A, B]] * e`.
Args:
A (FMATRIX): Real, Symmetric, Positive Definite Matrix of size [mA, , mA]
B (FMATRIX): Real, Symmetric, Positive Definite Matrix of size [mB, mB]
e (FVECTOR): Vector of shape [mA * mB, 1]
Returns:
FVECTOR: Vector of shape [mA * mB, 1] solution to `cholesky[kron[A, B]] * e`
"""
mA = A.shape[0]
mB = B.shape[0]
L_a = np.linalg.cholesky(A)
L_b = np.linalg.cholesky(B)
out = np.zeros((mA * mB))
for ii in range(mA):
for jj in range(ii + 1):
for hh in range(mB):
for kk in range(hh + 1):
out[mB * ii + hh] += L_a[ii, jj] * L_b[hh, kk] * e[mB * jj + kk]
return out
@numba.njit(FVECTOR(FMATRIX, FMATRIX, FVECTOR), cache=True)
def chol_kron_e_numpy(A: FMATRIX, B: FMATRIX, e: FVECTOR) -> FVECTOR:
"""Solution to `cholesky[kron[A, B]] * e`.
Computes using NumPy functions. For testing purposes only.
Args:
A (FMATRIX): Real, Symmetric, Positive Definite Matrix of size [mA, , mA]
B (FMATRIX): Real, Symmetric, Positive Definite Matrix of size [mB, mB]
e (FVECTOR): Vector of shape [mA * mB, 1]
Returns:
FVECTOR: Vector of shape [mA * mB, 1] solution to `cholesky[kron[A, B]] * e`
"""
out = np.linalg.cholesky(kron(A, B))
return np.dot(out, e)
import pytest
import numpy as np
import numpy.testing as npt
import scipy.stats as stats
from . import chol_kron_ab as cke
def test_chol():
for ii in range(100):
n = np.random.choice(range(2, 300))
A = stats.wishart.rvs(df=n + 2, scale=np.random.randn(n)**2.)
L_o = cke.chol(A)
L_f = np.linalg.cholesky(A)
npt.assert_array_almost_equal(L_o, L_f)
def test_chol_kron_e():
for _ in range(100):
n0, n1 = np.random.choice(range(2, 30), size=2)
A = stats.wishart.rvs(df=n0 + 2, scale=np.random.randn(n0)**2.)
B = stats.wishart.rvs(df=n1 + 2, scale=np.random.randn(n1)**2.)
e = np.random.randn(n0 * n1)
ck_o = cke.chol_kron_e_loop(A, B, e)
ck_f = np.linalg.cholesky(np.kron(A, B)).dot(e)
print(ck_o)
print()
print(ck_f)
npt.assert_array_almost_equal(ck_o, ck_f)
def test_chol_kron_e_numpy():
for _ in range(100):
n0, n1 = np.random.choice(range(2, 30), size=2)
A = stats.wishart.rvs(df=n0 + 2, scale=np.random.randn(n0)**2.)
B = stats.wishart.rvs(df=n1 + 2, scale=np.random.randn(n1)**2.)
e = np.random.randn(n0 * n1)
ck_o = cke.chol_kron_e_numpy(A, B, e)
ck_f = np.linalg.cholesky(np.kron(A, B)).dot(e)
npt.assert_array_almost_equal(ck_o, ck_f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment