Skip to content

Instantly share code, notes, and snippets.

@ogrisel
Last active July 3, 2023 18:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ogrisel/44bae6f8988abacae358c047d3ecc147 to your computer and use it in GitHub Desktop.
Save ogrisel/44bae6f8988abacae358c047d3ecc147 to your computer and use it in GitHub Desktop.
PCA GPU
# %%
from sklearn import set_config
from sklearn.base import clone
set_config(array_api_dispatch=True)
# %%
try:
import torch
except ModuleNotFoundError:
torch = None
# %%
try:
import cupy
except ModuleNotFoundError:
cupy = None
# %%
try:
import cuml
except ModuleNotFoundError:
cuml = None
# %%
from threadpoolctl import threadpool_info
from pprint import pprint
pprint(threadpool_info())
# %%
import numpy as np
import joblib
m = joblib.Memory(location="./joblib")
@m.cache
def make_data(n_samples=int(5e5), n_features=1000, rank=30, dtype=np.float32, seed=0):
rng = np.random.default_rng(0)
s = np.diag(np.linspace(100, 0, rank)).astype(dtype)
A = rng.standard_normal(size=(n_samples, rank)).astype(dtype)
B = rng.standard_normal(size=(rank, n_features)).astype(dtype)
return A @ s @ B
X_np = make_data()
print(f"Data shape {X_np.shape} and dtype {X_np.dtype} of size {X_np.nbytes / 1e6:.1f} MB")
# %%
from threadpoolctl import threadpool_limits
from sklearn.decomposition import PCA
from time import perf_counter
k = 5
svd_solver = "full"
if svd_solver == "randomized":
pca_params = dict(
n_components=k,
svd_solver="randomized",
power_iteration_normalizer="QR",
random_state=0,
)
else:
pca_params = dict(
n_components=k,
svd_solver="full",
)
pca_np = PCA(**pca_params)
print("Running benchmarks for:")
print(pca_np)
tic = perf_counter()
pca_np.fit(X_np)
toc = perf_counter()
print(
f"Fitting PCA(n_components={k}) with numpy took {toc - tic:.3f}s"
)
# print(pca_np.explained_variance_ratio_)
# for n_threads in [1, 4]:
# with threadpool_limits(limits=n_threads):
# tic = perf_counter()
# pca_np.fit(X_np)
# toc = perf_counter()
# print(
# f"Fitting PCA(n_components={k}) with numpy and {n_threads=} took {toc - tic:.3f}s"
# )
# print(pca_np.explained_variance_ratio_)
# %%
if torch is not None:
X_torch_cpu = torch.tensor(X_np)
pca_torch_cpu = clone(pca_np)
tic = perf_counter()
pca_torch_cpu.fit(X_torch_cpu)
toc = perf_counter()
print(
f"Fitting PCA(n_components={k}) with torch on CPU took {toc - tic:.3f}s"
)
# print(pca_torch_cpu.explained_variance_ratio_)
if torch is not None and torch.cuda.is_available():
X_torch_gpu = X_torch_cpu.to("cuda")
pca_torch_gpu = clone(pca_np)
tic = perf_counter()
pca_torch_gpu.fit(X_torch_gpu)
torch.cuda.synchronize()
toc = perf_counter()
print(
f"Fitting PCA(n_components={k}) with torch on GPU took {toc - tic:.3f}s"
)
# print(pca_torch_gpu.explained_variance_ratio_.cpu())
# %%
if cupy is not None:
X_cupy = cupy.asarray(X_np)
pca_cupy = clone(pca_np)
tic = perf_counter()
pca_cupy.fit(X_cupy)
cupy.cuda.stream.get_current_stream().synchronize()
toc = perf_counter()
print(
f"Fitting PCA(n_components={k}) with cupy on GPU took {toc - tic:.3f}s"
)
# print(pca_cupy.explained_variance_ratio_)
# %%
if svd_solver == "full" and cuml is not None and cupy is not None:
from cuml.decomposition import PCA as PCA_cuml
# cuML's PCA does not support randomized SVD
pca_cuml = PCA_cuml(**pca_params)
tic = perf_counter()
pca_cuml.fit(X_cupy)
cupy.cuda.stream.get_current_stream().synchronize()
toc = perf_counter()
print(
f"Fitting PCA(n_components={k}) with cupy with cuML on GPU took {toc - tic:.3f}s"
)
# print(pca_cuml.explained_variance_ratio_)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment