Created
July 13, 2023 11:17
-
-
Save mathurinm/aaf9762d03fbf4ffd691fa6b4d1ca68e to your computer and use it in GitHub Desktop.
scipy vs numpy for sparse
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
os.environ["OMP_NUM_THREADS"] = "1" | |
from time import perf_counter | |
from sklearn.utils import check_random_state | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from scipy import sparse | |
d = 1000 | |
densities = np.geomspace(1, 1e-4, num=10) | |
rng = check_random_state(0) | |
n_reps = 10 | |
times = np.zeros((4, densities.shape[0], n_reps)) | |
for idx_dens, dens in enumerate(densities): | |
X = sparse.random( | |
d, d, density=dens, random_state=0, format='csr', | |
data_rvs=rng.randn) | |
X_coo = X.tocoo() | |
X_csc = X.tocsc() | |
X_dense = X.toarray() | |
for idx_meth, mat in enumerate([X, X_coo, X_csc, X_dense]): | |
for idx_rep in range(n_reps): | |
print(idx_rep) | |
t0 = perf_counter() | |
mat.T @ mat | |
t1 = perf_counter() | |
times[idx_meth, idx_dens, idx_rep] = t1 - t0 | |
means = times.mean(axis=-1) | |
stds = times.std(axis=-1) | |
# plt.close('all') | |
plt.figure(constrained_layout=True) | |
plt.loglog(densities, means.T, | |
label=["X row sparse format", "X COO" , "X column sparse format", "X dense (numpy)"]) | |
for idx in range(len(times)): | |
plt.fill_between(densities, (means-stds)[idx], (means+stds)[idx], alpha=0.5) | |
plt.legend() | |
plt.title("computing $X^T X$, $X \in \mathbb{R}^{1000 \\times 1000}$") | |
plt.xlabel("X density") | |
plt.ylabel("time (s)") | |
plt.show(block=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment