Created
November 19, 2019 19:33
-
-
Save MattEding/fedeef26c79b5d8bfe3836be9627fc80 to your computer and use it in GitHub Desktop.
ADASYN Vectorize vs Loop Benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Namespace(file=None, n_jobs=4, n_neighbors=5, sampling_strategy='auto', trials=3) | |
1 ecoli | |
0.6515465679999999 | |
2 optical_digits | |
1.081901593 | |
3 satimage | |
0.757048266 | |
4 pen_digits | |
0.7772778269999998 | |
5 abalone | |
0.7216238229999998 | |
6 sick_euthyroid | |
0.7105965379999999 | |
7 spectrometer | |
0.6476733709999998 | |
8 car_eval_34 | |
0.672791621 | |
9 isolet | |
6.778681332 | |
10 us_crime | |
0.6839828560000001 | |
11 yeast_ml8 | |
0.7074405609999985 | |
12 scene | |
0.8076008940000001 | |
13 libras_move | |
0.6478258960000005 | |
14 thyroid_sick | |
0.7199621090000008 | |
15 coil_2000 | |
0.9155116790000015 | |
16 arrhythmia | |
0.6713255459999985 | |
17 solar_flare_m0 | |
0.6787288859999983 | |
18 oil | |
0.657154803000001 | |
19 car_eval_4 | |
0.6858342119999996 | |
20 wine_quality | |
0.7266068040000029 | |
21 letter_img | |
1.0060752449999981 | |
22 yeast_me2 | |
0.6741237449999993 | |
23 webpage | |
20.705942846 | |
24 ozone_level | |
0.6946011910000038 | |
25 mammography | |
0.8431376089999958 | |
26 protein_homo | |
8.687401287 | |
27 abalone_19 | |
0.705801689999987 | |
csr(1000, 10)-d0.01-p0.01 | |
0.6483568239999897 | |
arr(1000, 10)-d0.01-p0.01 | |
0.6715544219999998 | |
csr(1000, 10)-d0.1-p0.1 | |
1.136277604 | |
arr(1000, 10)-d0.1-p0.1 | |
0.6596833239999995 | |
csr(10000, 50)-d0.01-p0.01 | |
4.162356662999997 | |
arr(10000, 50)-d0.01-p0.01 | |
1.2406726000000106 | |
csr(10000, 50)-d0.1-p0.1 | |
8.983309900999998 | |
arr(10000, 50)-d0.1-p0.1 | |
1.8265618060000008 | |
csr(100000, 100)-d0.01-p0.01 | |
69.527295614 | |
arr(100000, 100)-d0.01-p0.01 | |
133.846439331 | |
csr(100000, 100)-d0.1-p0.1 | |
230.76780414200005 | |
arr(100000, 100)-d0.1-p0.1 | |
292.81703859699996 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Namespace(file=None, n_jobs=4, n_neighbors=5, sampling_strategy='auto', trials=3) | |
1 ecoli | |
0.6454916709999998 | |
2 optical_digits | |
0.891774474 | |
3 satimage | |
0.6803766329999998 | |
4 pen_digits | |
0.6826901650000003 | |
5 abalone | |
0.6517725180000005 | |
6 sick_euthyroid | |
0.6531233289999996 | |
7 spectrometer | |
0.6459990499999986 | |
8 car_eval_34 | |
0.6503424380000009 | |
9 isolet | |
7.005787720000001 | |
10 us_crime | |
0.6745569519999997 | |
11 yeast_ml8 | |
0.6682129420000003 | |
12 scene | |
0.7355924750000007 | |
13 libras_move | |
0.6388636200000022 | |
14 thyroid_sick | |
0.6670594270000016 | |
15 coil_2000 | |
0.776676092999999 | |
16 arrhythmia | |
0.6448950659999966 | |
17 solar_flare_m0 | |
0.6482231889999994 | |
18 oil | |
0.6433028350000001 | |
19 car_eval_4 | |
0.6546346290000002 | |
20 wine_quality | |
0.6537185050000005 | |
21 letter_img | |
0.7824089070000007 | |
22 yeast_me2 | |
0.6519063629999984 | |
23 webpage | |
20.003640710999996 | |
24 ozone_level | |
0.6614500730000046 | |
25 mammography | |
0.7093737689999955 | |
26 protein_homo | |
6.829618027999999 | |
27 abalone_19 | |
0.6590629220000039 | |
csr(1000, 10)-d0.01-p0.01 | |
0.6387534919999993 | |
arr(1000, 10)-d0.01-p0.01 | |
0.6466937980000012 | |
csr(1000, 10)-d0.1-p0.1 | |
0.6638392989999957 | |
arr(1000, 10)-d0.1-p0.1 | |
0.6362426099999965 | |
csr(10000, 50)-d0.01-p0.01 | |
0.7112311320000089 | |
arr(10000, 50)-d0.01-p0.01 | |
1.104831180000005 | |
csr(10000, 50)-d0.1-p0.1 | |
1.3461644930000034 | |
arr(10000, 50)-d0.1-p0.1 | |
1.6487868080000112 | |
csr(100000, 100)-d0.01-p0.01 | |
11.638341877999991 | |
arr(100000, 100)-d0.01-p0.01 | |
132.322504057 | |
csr(100000, 100)-d0.1-p0.1 | |
153.642012748 | |
arr(100000, 100)-d0.1-p0.1 | |
290.590596469 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import textwrap | |
from timeit import timeit | |
import numpy as np | |
import pandas as pd | |
from scipy import sparse | |
from imblearn.datasets import fetch_datasets | |
import imblearn.datasets._zenodo as zenodo | |
from imblearn.over_sampling import ADASYN | |
def trial_zenodo(name, sampling_strategy, n_neighbors, n_jobs, trials): | |
setup = f''' | |
from imblearn.datasets import fetch_datasets | |
from imblearn.over_sampling import ADASYN | |
sampling_strategy = '{sampling_strategy}' | |
n_neighbors = {n_neighbors} | |
n_jobs = {n_jobs} | |
dataset = fetch_datasets()['{name}'] | |
X, y = dataset.data, dataset.target | |
adasyn = ADASYN(sampling_strategy, n_neighbors=n_neighbors, n_jobs=n_jobs, random_state=0) | |
''' | |
setup = textwrap.dedent(setup).strip() | |
t = timeit('adasyn.fit_resample(X, y)', setup=setup, number=trials) | |
return t | |
def trial_sparse(fmt, shape, density, p, sampling_strategy, n_neighbors, n_jobs, trials): | |
setup = f''' | |
import numpy as np | |
from scipy import sparse | |
from imblearn.over_sampling import ADASYN | |
shape = {shape} | |
density = {density} | |
p = {p} | |
rng = np.random.RandomState(seed=0) | |
y = rng.choice([0, 1], size=shape[0], p=[p, 1-p]) | |
X = sparse.random(*shape, density=density, random_state=rng) | |
X = X.to{fmt}() | |
sampling_strategy = '{sampling_strategy}' | |
n_neighbors = {n_neighbors} | |
n_jobs = {n_jobs} | |
adasyn = ADASYN(sampling_strategy, n_neighbors=n_neighbors, n_jobs=n_jobs, random_state=0) | |
''' | |
setup = textwrap.dedent(setup).strip() | |
t = timeit('adasyn.fit_resample(X, y)', setup=setup, number=trials) | |
return t | |
def all_trials(file, sampling_strategy, n_neighbors, n_jobs, trials): | |
ids = range(1, 28) | |
for i in ids: | |
name = zenodo.MAP_ID_NAME[i] | |
with open(file, 'a') as f: print(i, name, file=f) | |
t = trial_zenodo(name, sampling_strategy, n_neighbors, n_jobs, trials) | |
with open(file, 'a') as f: print('\t', t, file=f) | |
shapes = [(1_000, 10), (10_000, 50), (100_000, 100)] | |
densities = [0.01, 0.1] | |
ps = [0.01, 0.1] | |
for shape in shapes: | |
for density, p in zip(densities, ps): | |
for fmt in ['csr', 'array']: | |
label = f'{fmt[:3]}{shape}-d{density}-p{p}' | |
with open(file, 'a') as f: print(label, file=f) | |
t = trial_sparse(fmt, shape, density, p, sampling_strategy, n_neighbors, n_jobs, trials) | |
with open(file, 'a') as f: print('\t', t, file=f) | |
def main(): | |
parser = argparse.ArgumentParser(zenodo.__doc__) | |
parser.add_argument('--trials', '-t', default=3, type=int, help='number of trials for timeit') | |
parser.add_argument('--n_jobs', '-j', default=None, type=int, help='n_jobs for ADASYN') | |
choices = ['minority', 'not majority', 'not majority', 'all', 'auto', 'none'] | |
parser.add_argument('--sampling_strategy', '-s', default='auto', choices=choices, help='sampling_strategy for ADASYN') | |
parser.add_argument('--n_neighbors', '-k', default=5, type=int, help='n_neighbors for ADASYN') | |
parser.add_argument('--file', '-f', default=None, help='file to save results to') | |
args = parser.parse_args() | |
if args.file is None: | |
file = f'tr{args.trials}-nj{args.n_jobs}-ss{args.sampling_strategy}-nn{args.n_neighbors}.txt' | |
else: | |
file = args.file | |
with open(file, 'a') as f: print(args, file=f) | |
all_trials(file, args.sampling_strategy, args.n_neighbors, args.n_jobs, args.trials) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment