Micky774/cython_dump_svmlight_bench.py

## cython_dump_svmlight_bench.py
from time import time
import pandas as pd
import numpy as np
import scipy.sparse as sp
from sklearn.datasets import dump_svmlight_file

def loop(func, params={}, num_trials=1):
    for _ in range(num_trials):
        start_time = time()
        func(**params)
        total_time = time()-start_time
        yield total_time

def populate_array(func, params={}, num_trials=1):
    return np.array(list(loop(func, params, num_trials)))

def get_stats(data, data_func = None):
    extra_stats = None
    if data_func:
        extra_stats = data_func(data)
    return data.mean(), np.std(data), extra_stats

def generate_data(n_samples, n_features, X_sparse=False, y_sparse=False, dtype=np.float64):
    rng = np.random.RandomState(42)
    X = np.round(rng.rand(n_samples,n_features)*50).astype(dtype)
    if X_sparse:
        X = sp.csr_matrix(X)
    y = np.round(rng.rand(n_samples,)+1).astype(dtype)
    if y_sparse:
        y = sp.csr_matrix(y)
    if y_sparse and y.shape[0] == 1:
        y = y.T
    #query_id = np.arange(n_samples) // 2
    return X, y

path = "local_artifacts/svmd_"
rows = []
for i in range(3):
    for j in range(2):
        X, y = generate_data(int(10**(i+2)), int(10**(j+2)))
        kwargs = {"X":X, "y":y, "f":path+"1"}
        data = populate_array(dump_svmlight_file, kwargs, 7)
        mean, std, _ = get_stats(data)
        row = {"shape":str(X.shape), "branch":"main", "mean":mean, "std":std}
        rows.append(row)
df = pd.DataFrame(rows)
df.to_csv("local_artifacts/svmlight_bench.csv", index_label=False)
	from time import time
	import pandas as pd
	import numpy as np
	import scipy.sparse as sp
	from sklearn.datasets import dump_svmlight_file

	def loop(func, params={}, num_trials=1):
	for _ in range(num_trials):
	start_time = time()
	func(**params)
	total_time = time()-start_time
	yield total_time

	def populate_array(func, params={}, num_trials=1):
	return np.array(list(loop(func, params, num_trials)))

	def get_stats(data, data_func = None):
	extra_stats = None
	if data_func:
	extra_stats = data_func(data)
	return data.mean(), np.std(data), extra_stats

	def generate_data(n_samples, n_features, X_sparse=False, y_sparse=False, dtype=np.float64):
	rng = np.random.RandomState(42)
	X = np.round(rng.rand(n_samples,n_features)*50).astype(dtype)
	if X_sparse:
	X = sp.csr_matrix(X)
	y = np.round(rng.rand(n_samples,)+1).astype(dtype)
	if y_sparse:
	y = sp.csr_matrix(y)
	if y_sparse and y.shape[0] == 1:
	y = y.T
	#query_id = np.arange(n_samples) // 2
	return X, y

	path = "local_artifacts/svmd_"
	rows = []
	for i in range(3):
	for j in range(2):
	X, y = generate_data(int(10(i+2)), int(10(j+2)))
	kwargs = {"X":X, "y":y, "f":path+"1"}
	data = populate_array(dump_svmlight_file, kwargs, 7)
	mean, std, _ = get_stats(data)
	row = {"shape":str(X.shape), "branch":"main", "mean":mean, "std":std}
	rows.append(row)
	df = pd.DataFrame(rows)
	df.to_csv("local_artifacts/svmlight_bench.csv", index_label=False)