Meekail Zain Micky774

## bench.py
import time
import numpy as np
import pandas as pd
import argparse
from scipy import linalg

# import streamlit as st
# import altair as alt

parser = argparse.ArgumentParser(

## fastica_test.py
from scipy import linalg
import numpy as np
from sklearn.utils._testing import assert_array_almost_equal
import warnings

def assert_sign_redundant(x,y):
    X, Y = x.copy(), y.copy()
    for A in (X,Y):
        for c in range(A.shape[1]):
            if A[0,c] < 0:

## fastica_memory.csv

          
            shape
             svd
             eigh
             svd/eigh

            
              (100, 100)
               5.66 MiB
               0.80 MiB
               7.08

            
              (100, 1000)
               14.29 MiB
               26.66 MiB
               0.54

            
              (1000, 100)
               8.08 MiB
               0.20 MiB
               40.40

            
              (1000, 1000)
               48.02 MiB
               23.83 MiB
               2.02

            
              (10000, 100)
               766.39 MiB
               0.18 MiB
               4257.72

            
              (10000, 1000)
               890.49 MiB
               23.03 MiB
               38.67

## benchmark.py
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)

from sklearn.linear_model import Lasso
import numpy as np
import time

clf = Lasso(max_iter=200)
n_samples = 500000

## cython_benchmarks.txt
num_elements x DTYPE
Performance on BRANCH
Performance on MAIN
======================

1e2xFP64
3.11 µs ± 111 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
13 µs ± 113 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)

1e3xFP64

## cython_dump_svmlight_bench.py
from time import time
import pandas as pd
import numpy as np
import scipy.sparse as sp
from sklearn.datasets import dump_svmlight_file

def loop(func, params={}, num_trials=1):
    for _ in range(num_trials):
        start_time = time()
        func(**params)

## svmlight_bench_all.csv
shape,main,PR,main/PR,X_sparse,y_sparse
0,"(100, 100)",0.0330301012311662,0.0344602039882114,0.9584998754640446,True,True
1,"(100, 1000)",0.3113810675484793,0.3663262639726911,0.85001021813629,True,True
2,"(1000, 100)",0.3353710855756487,0.3434690747942243,0.9764229451416342,True,True
3,"(1000, 1000)",3.149646248136248,3.4548325879233226,0.9116639281295769,True,True
4,"(10000, 100)",3.1907405853271484,3.161099229540144,1.0093769140525577,True,True
5,"(10000, 1000)",31.38007930346898,34.35677589688982,0.9133592569234557,True,True
0,"(100, 100)",0.0423240661621093,0.033186742237636,1.2753305479352222,True,False
1,"(100, 1000)",0.3182226249149867,0.3539021696363176,0.8991824640182442,True,False
2,"(1000, 100)",0.3290740762438093,0.3116425446101597,1.055934377173871,True,False

## gist:873f75a747cb33c058dfc11286c8ff45
X_shape,X_sparse,n_repeat,duration,branch
"(100, 100)",False,0,0.0122680000000059,pr
"(100, 100)",False,1,0.0107232999999951,pr
"(100, 100)",False,2,0.0104374999999947,pr
"(100, 100)",False,3,0.0101337000000114,pr
"(100, 100)",False,4,0.0102572999999779,pr
"(100, 100)",False,5,0.0097741000000155,pr
"(100, 100)",False,6,0.0098145999999985,pr
"(100, 100)",False,7,0.0097294999999917,pr
"(100, 100)",False,8,0.0098713000000145,pr

## benchmark_dsvmlight.py
# %%
from time import time
import pandas as pd

def loop(func, params={}, num_trials=1):
    for _ in range(num_trials):
        start_time = time()
        func(**params)
        total_time = time()-start_time
        yield total_time

## gist:0d0903d411efe88ad72ecb68305d2cd1
# %%
import numpy as np
import scipy.sparse as sp

def generate_data(n_samples, n_features, X_density=1, y_sparse=False, dtype=np.float64, random_state=None):
    rng = np.random.RandomState(random_state)
    if X_density < 1:
        X = sp.random(n_samples, n_features, format="csr", density=X_density, random_state=rng)
    else:
        X = np.round(rng.rand(n_samples,n_features)*50).astype(dtype)
	import time
	import numpy as np
	import pandas as pd
	import argparse
	from scipy import linalg

	# import streamlit as st
	# import altair as alt

	parser = argparse.ArgumentParser(
	from scipy import linalg
	import numpy as np
	from sklearn.utils._testing import assert_array_almost_equal
	import warnings

	def assert_sign_redundant(x,y):
	X, Y = x.copy(), y.copy()
	for A in (X,Y):
	for c in range(A.shape[1]):
	if A[0,c] < 0:
shape	svd	eigh	svd/eigh
(100, 100)	5.66 MiB	0.80 MiB	7.08
(100, 1000)	14.29 MiB	26.66 MiB	0.54
(1000, 100)	8.08 MiB	0.20 MiB	40.40
(1000, 1000)	48.02 MiB	23.83 MiB	2.02
(10000, 100)	766.39 MiB	0.18 MiB	4257.72
(10000, 1000)	890.49 MiB	23.03 MiB	38.67
	import warnings
	from sklearn.exceptions import ConvergenceWarning
	warnings.filterwarnings("ignore", category=ConvergenceWarning)

	from sklearn.linear_model import Lasso
	import numpy as np
	import time

	clf = Lasso(max_iter=200)
	n_samples = 500000
	num_elements x DTYPE
	Performance on BRANCH
	Performance on MAIN
	======================

	1e2xFP64
	3.11 µs ± 111 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
	13 µs ± 113 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)

	1e3xFP64
	from time import time
	import pandas as pd
	import numpy as np
	import scipy.sparse as sp
	from sklearn.datasets import dump_svmlight_file

	def loop(func, params={}, num_trials=1):
	for _ in range(num_trials):
	start_time = time()
	func(**params)
	shape,main,PR,main/PR,X_sparse,y_sparse
	0,"(100, 100)",0.0330301012311662,0.0344602039882114,0.9584998754640446,True,True
	1,"(100, 1000)",0.3113810675484793,0.3663262639726911,0.85001021813629,True,True
	2,"(1000, 100)",0.3353710855756487,0.3434690747942243,0.9764229451416342,True,True
	3,"(1000, 1000)",3.149646248136248,3.4548325879233226,0.9116639281295769,True,True
	4,"(10000, 100)",3.1907405853271484,3.161099229540144,1.0093769140525577,True,True
	5,"(10000, 1000)",31.38007930346898,34.35677589688982,0.9133592569234557,True,True
	0,"(100, 100)",0.0423240661621093,0.033186742237636,1.2753305479352222,True,False
	1,"(100, 1000)",0.3182226249149867,0.3539021696363176,0.8991824640182442,True,False
	2,"(1000, 100)",0.3290740762438093,0.3116425446101597,1.055934377173871,True,False
	X_shape,X_sparse,n_repeat,duration,branch
	"(100, 100)",False,0,0.0122680000000059,pr
	"(100, 100)",False,1,0.0107232999999951,pr
	"(100, 100)",False,2,0.0104374999999947,pr
	"(100, 100)",False,3,0.0101337000000114,pr
	"(100, 100)",False,4,0.0102572999999779,pr
	"(100, 100)",False,5,0.0097741000000155,pr
	"(100, 100)",False,6,0.0098145999999985,pr
	"(100, 100)",False,7,0.0097294999999917,pr
	"(100, 100)",False,8,0.0098713000000145,pr
	# %%
	from time import time
	import pandas as pd

	def loop(func, params={}, num_trials=1):
	for _ in range(num_trials):
	start_time = time()
	func(**params)
	total_time = time()-start_time
	yield total_time