Skip to content

Instantly share code, notes, and snippets.

@mathurinm
Created July 13, 2023 11:17
Show Gist options
  • Save mathurinm/aaf9762d03fbf4ffd691fa6b4d1ca68e to your computer and use it in GitHub Desktop.
Save mathurinm/aaf9762d03fbf4ffd691fa6b4d1ca68e to your computer and use it in GitHub Desktop.
scipy vs numpy for sparse
import os
os.environ["OMP_NUM_THREADS"] = "1"
from time import perf_counter
from sklearn.utils import check_random_state
import numpy as np
import matplotlib.pyplot as plt
from scipy import sparse
d = 1000
densities = np.geomspace(1, 1e-4, num=10)
rng = check_random_state(0)
n_reps = 10
times = np.zeros((4, densities.shape[0], n_reps))
for idx_dens, dens in enumerate(densities):
X = sparse.random(
d, d, density=dens, random_state=0, format='csr',
data_rvs=rng.randn)
X_coo = X.tocoo()
X_csc = X.tocsc()
X_dense = X.toarray()
for idx_meth, mat in enumerate([X, X_coo, X_csc, X_dense]):
for idx_rep in range(n_reps):
print(idx_rep)
t0 = perf_counter()
mat.T @ mat
t1 = perf_counter()
times[idx_meth, idx_dens, idx_rep] = t1 - t0
means = times.mean(axis=-1)
stds = times.std(axis=-1)
# plt.close('all')
plt.figure(constrained_layout=True)
plt.loglog(densities, means.T,
label=["X row sparse format", "X COO" , "X column sparse format", "X dense (numpy)"])
for idx in range(len(times)):
plt.fill_between(densities, (means-stds)[idx], (means+stds)[idx], alpha=0.5)
plt.legend()
plt.title("computing $X^T X$, $X \in \mathbb{R}^{1000 \\times 1000}$")
plt.xlabel("X density")
plt.ylabel("time (s)")
plt.show(block=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment