Skip to content

Instantly share code, notes, and snippets.

View Micky774's full-sized avatar

Meekail Zain Micky774

  • Quansight
View GitHub Profile
@Micky774
Micky774 / bench.py
Created February 27, 2022 02:07
FastICA Whiten Benchmarks
import time
import numpy as np
import pandas as pd
import argparse
from scipy import linalg
# import streamlit as st
# import altair as alt
parser = argparse.ArgumentParser(
@Micky774
Micky774 / fastica_test.py
Created March 3, 2022 23:04
Test correctness of `eigh` solver in `FastICA`
from scipy import linalg
import numpy as np
from sklearn.utils._testing import assert_array_almost_equal
import warnings
def assert_sign_redundant(x,y):
X, Y = x.copy(), y.copy()
for A in (X,Y):
for c in range(A.shape[1]):
if A[0,c] < 0:
@Micky774
Micky774 / fastica_memory.csv
Last active March 13, 2022 21:45
Memory footprint of SVD vs EIGH for different shapes
shape svd eigh svd/eigh
(100, 100) 5.66 MiB 0.80 MiB 7.08
(100, 1000) 14.29 MiB 26.66 MiB 0.54
(1000, 100) 8.08 MiB 0.20 MiB 40.40
(1000, 1000) 48.02 MiB 23.83 MiB 2.02
(10000, 100) 766.39 MiB 0.18 MiB 4257.72
(10000, 1000) 890.49 MiB 23.03 MiB 38.67
@Micky774
Micky774 / benchmark.py
Created March 18, 2022 22:50
`cda_fast` benchmark
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)
from sklearn.linear_model import Lasso
import numpy as np
import time
clf = Lasso(max_iter=200)
n_samples = 500000
@Micky774
Micky774 / cython_benchmarks.txt
Created April 23, 2022 04:05
Benchmarks for `_assert_all_finite` with cython
num_elements x DTYPE
Performance on BRANCH
Performance on MAIN
======================
1e2xFP64
3.11 µs ± 111 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
13 µs ± 113 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
1e3xFP64
@Micky774
Micky774 / cython_dump_svmlight_bench.py
Created May 1, 2022 22:35
Benchmark file for the cythonized `dump_svmlight_file`
from time import time
import pandas as pd
import numpy as np
import scipy.sparse as sp
from sklearn.datasets import dump_svmlight_file
def loop(func, params={}, num_trials=1):
for _ in range(num_trials):
start_time = time()
func(**params)
@Micky774
Micky774 / svmlight_bench_all.csv
Created May 2, 2022 00:19
Benchmark results of svmlight cythonization
We can make this file beautiful and searchable if this error is corrected: It looks like row 2 should actually have 6 columns, instead of 7. in line 1.
shape,main,PR,main/PR,X_sparse,y_sparse
0,"(100, 100)",0.0330301012311662,0.0344602039882114,0.9584998754640446,True,True
1,"(100, 1000)",0.3113810675484793,0.3663262639726911,0.85001021813629,True,True
2,"(1000, 100)",0.3353710855756487,0.3434690747942243,0.9764229451416342,True,True
3,"(1000, 1000)",3.149646248136248,3.4548325879233226,0.9116639281295769,True,True
4,"(10000, 100)",3.1907405853271484,3.161099229540144,1.0093769140525577,True,True
5,"(10000, 1000)",31.38007930346898,34.35677589688982,0.9133592569234557,True,True
0,"(100, 100)",0.0423240661621093,0.033186742237636,1.2753305479352222,True,False
1,"(100, 1000)",0.3182226249149867,0.3539021696363176,0.8991824640182442,True,False
2,"(1000, 100)",0.3290740762438093,0.3116425446101597,1.055934377173871,True,False
@Micky774
Micky774 / gist:873f75a747cb33c058dfc11286c8ff45
Last active May 24, 2022 13:05
Benchmark for Cython vs Python implementation of `dump_svmlight_file`
X_shape,X_sparse,n_repeat,duration,branch
"(100, 100)",False,0,0.0122680000000059,pr
"(100, 100)",False,1,0.0107232999999951,pr
"(100, 100)",False,2,0.0104374999999947,pr
"(100, 100)",False,3,0.0101337000000114,pr
"(100, 100)",False,4,0.0102572999999779,pr
"(100, 100)",False,5,0.0097741000000155,pr
"(100, 100)",False,6,0.0098145999999985,pr
"(100, 100)",False,7,0.0097294999999917,pr
"(100, 100)",False,8,0.0098713000000145,pr
@Micky774
Micky774 / benchmark_dsvmlight.py
Created May 24, 2022 13:07
Benchmark file for `dump_svmlight_file`
# %%
from time import time
import pandas as pd
def loop(func, params={}, num_trials=1):
for _ in range(num_trials):
start_time = time()
func(**params)
total_time = time()-start_time
yield total_time
@Micky774
Micky774 / gist:0d0903d411efe88ad72ecb68305d2cd1
Last active May 26, 2022 19:20
Benchmark file for `_assert_all_finite`
# %%
import numpy as np
import scipy.sparse as sp
def generate_data(n_samples, n_features, X_density=1, y_sparse=False, dtype=np.float64, random_state=None):
rng = np.random.RandomState(random_state)
if X_density < 1:
X = sp.random(n_samples, n_features, format="csr", density=X_density, random_state=rng)
else:
X = np.round(rng.rand(n_samples,n_features)*50).astype(dtype)