Created
September 5, 2019 00:15
-
-
Save MattEding/97c3f36f508ed26e9b2e7dd22db17887 to your computer and use it in GitHub Desktop.
Benchmark to compare original implemenation vs my vectorized implementation for SMOTE algorithm.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from time import time | |
import numpy as np | |
from scipy import sparse | |
from imblearn.over_sampling import SMOTE | |
def benchmark(sampler, X, y): | |
imb = np.unique(y, return_counts=True) | |
m, n = X.shape | |
try: | |
d = X.nnz / m / n | |
f = X.format | |
except AttributeError: | |
d = 1 | |
f = 'arr' | |
t0 = time() | |
sampler.fit_resample(X, y) | |
t = time() - t0 | |
print(f"{type(sampler).__name__}: X shape: {(m, n)}, y imbalance: {imb}, density: {d}, format: {f}, time: {t}") | |
sampler = SMOTE(random_state=0) | |
rng = np.random.RandomState(seed=0) | |
ms = [1_000, 10_000, 100_000] | |
ns = [10, 50, 100] | |
ds = [0.01, 0.1] | |
ps = [0.01, 0.1] | |
for m, n in zip(ms, ns): | |
for d, p in zip(ds, ps): | |
y = rng.choice([0, 1], size=m, p=[p, 1-p]) | |
X = sparse.random(m, n, density=d, format='coo') | |
benchmark(sampler, X, y) | |
X = X.tocsr() | |
benchmark(sampler, X, y) | |
X = X.tocsc() | |
benchmark(sampler, X, y) | |
X = X.toarray() | |
benchmark(sampler, X, y) | |
''' | |
ORIGINAL | |
-------- | |
SMOTE: X shape: (1000, 10), y imbalance: (array([0, 1]), array([ 7, 993])), density: 0.01, format: coo, time: 0.07027316093444824 | |
SMOTE: X shape: (1000, 10), y imbalance: (array([0, 1]), array([ 7, 993])), density: 0.01, format: csr, time: 0.06946420669555664 | |
SMOTE: X shape: (1000, 10), y imbalance: (array([0, 1]), array([ 7, 993])), density: 0.01, format: csc, time: 0.06998085975646973 | |
SMOTE: X shape: (1000, 10), y imbalance: (array([0, 1]), array([ 7, 993])), density: 1, format: arr, time: 0.005276918411254883 | |
SMOTE: X shape: (1000, 10), y imbalance: (array([0, 1]), array([ 95, 905])), density: 0.1, format: coo, time: 0.2944509983062744 | |
SMOTE: X shape: (1000, 10), y imbalance: (array([0, 1]), array([ 95, 905])), density: 0.1, format: csr, time: 0.3965580463409424 | |
SMOTE: X shape: (1000, 10), y imbalance: (array([0, 1]), array([ 95, 905])), density: 0.1, format: csc, time: 0.30956602096557617 | |
SMOTE: X shape: (1000, 10), y imbalance: (array([0, 1]), array([ 95, 905])), density: 1, format: arr, time: 0.004606008529663086 | |
SMOTE: X shape: (10000, 50), y imbalance: (array([0, 1]), array([ 113, 9887])), density: 0.01, format: coo, time: 2.3150041103363037 | |
SMOTE: X shape: (10000, 50), y imbalance: (array([0, 1]), array([ 113, 9887])), density: 0.01, format: csr, time: 2.228933095932007 | |
SMOTE: X shape: (10000, 50), y imbalance: (array([0, 1]), array([ 113, 9887])), density: 0.01, format: csc, time: 2.2568271160125732 | |
SMOTE: X shape: (10000, 50), y imbalance: (array([0, 1]), array([ 113, 9887])), density: 1, format: arr, time: 0.05763578414916992 | |
SMOTE: X shape: (10000, 50), y imbalance: (array([0, 1]), array([1071, 8929])), density: 0.1, format: coo, time: 3.886691093444824 | |
SMOTE: X shape: (10000, 50), y imbalance: (array([0, 1]), array([1071, 8929])), density: 0.1, format: csr, time: 3.8926799297332764 | |
SMOTE: X shape: (10000, 50), y imbalance: (array([0, 1]), array([1071, 8929])), density: 0.1, format: csc, time: 4.220958948135376 | |
SMOTE: X shape: (10000, 50), y imbalance: (array([0, 1]), array([1071, 8929])), density: 1, format: arr, time: 0.12191629409790039 | |
SMOTE: X shape: (100000, 100), y imbalance: (array([0, 1]), array([ 977, 99023])), density: 0.01, format: coo, time: 32.011396169662476 | |
SMOTE: X shape: (100000, 100), y imbalance: (array([0, 1]), array([ 977, 99023])), density: 0.01, format: csr, time: 32.23091101646423 | |
SMOTE: X shape: (100000, 100), y imbalance: (array([0, 1]), array([ 977, 99023])), density: 0.01, format: csc, time: 33.7115478515625 | |
SMOTE: X shape: (100000, 100), y imbalance: (array([0, 1]), array([ 977, 99023])), density: 1, format: arr, time: 0.7365210056304932 | |
SMOTE: X shape: (100000, 100), y imbalance: (array([0, 1]), array([10023, 89977])), density: 0.1, format: coo, time: 43.57709002494812 | |
SMOTE: X shape: (100000, 100), y imbalance: (array([0, 1]), array([10023, 89977])), density: 0.1, format: csr, time: 45.25056982040405 | |
SMOTE: X shape: (100000, 100), y imbalance: (array([0, 1]), array([10023, 89977])), density: 0.1, format: csc, time: 99.44304585456848 | |
SMOTE: X shape: (100000, 100), y imbalance: (array([0, 1]), array([10023, 89977])), density: 1, format: arr, time: 16.320679903030396 | |
''' | |
''' | |
VECTORIZED | |
---------- | |
SMOTE: X shape: (1000, 10), y imbalance: (array([0, 1]), array([ 7, 993])), density: 0.01, format: coo, time: 0.0032851696014404297 | |
SMOTE: X shape: (1000, 10), y imbalance: (array([0, 1]), array([ 7, 993])), density: 0.01, format: csr, time: 0.0027887821197509766 | |
SMOTE: X shape: (1000, 10), y imbalance: (array([0, 1]), array([ 7, 993])), density: 0.01, format: csc, time: 0.003389120101928711 | |
SMOTE: X shape: (1000, 10), y imbalance: (array([0, 1]), array([ 7, 993])), density: 1, format: arr, time: 0.0014929771423339844 | |
SMOTE: X shape: (1000, 10), y imbalance: (array([0, 1]), array([ 95, 905])), density: 0.1, format: coo, time: 0.0031461715698242188 | |
SMOTE: X shape: (1000, 10), y imbalance: (array([0, 1]), array([ 95, 905])), density: 0.1, format: csr, time: 0.0027971267700195312 | |
SMOTE: X shape: (1000, 10), y imbalance: (array([0, 1]), array([ 95, 905])), density: 0.1, format: csc, time: 0.0039000511169433594 | |
SMOTE: X shape: (1000, 10), y imbalance: (array([0, 1]), array([ 95, 905])), density: 1, format: arr, time: 0.005051136016845703 | |
SMOTE: X shape: (10000, 50), y imbalance: (array([0, 1]), array([ 113, 9887])), density: 0.01, format: coo, time: 0.01177215576171875 | |
SMOTE: X shape: (10000, 50), y imbalance: (array([0, 1]), array([ 113, 9887])), density: 0.01, format: csr, time: 0.008515119552612305 | |
SMOTE: X shape: (10000, 50), y imbalance: (array([0, 1]), array([ 113, 9887])), density: 0.01, format: csc, time: 0.008799076080322266 | |
SMOTE: X shape: (10000, 50), y imbalance: (array([0, 1]), array([ 113, 9887])), density: 1, format: arr, time: 0.018984079360961914 | |
SMOTE: X shape: (10000, 50), y imbalance: (array([0, 1]), array([1071, 8929])), density: 0.1, format: coo, time: 0.05235886573791504 | |
SMOTE: X shape: (10000, 50), y imbalance: (array([0, 1]), array([1071, 8929])), density: 0.1, format: csr, time: 0.04006767272949219 | |
SMOTE: X shape: (10000, 50), y imbalance: (array([0, 1]), array([1071, 8929])), density: 0.1, format: csc, time: 0.04768490791320801 | |
SMOTE: X shape: (10000, 50), y imbalance: (array([0, 1]), array([1071, 8929])), density: 1, format: arr, time: 0.09134483337402344 | |
SMOTE: X shape: (100000, 100), y imbalance: (array([0, 1]), array([ 977, 99023])), density: 0.01, format: coo, time: 0.06119227409362793 | |
SMOTE: X shape: (100000, 100), y imbalance: (array([0, 1]), array([ 977, 99023])), density: 0.01, format: csr, time: 0.053843021392822266 | |
SMOTE: X shape: (100000, 100), y imbalance: (array([0, 1]), array([ 977, 99023])), density: 0.01, format: csc, time: 0.08270120620727539 | |
SMOTE: X shape: (100000, 100), y imbalance: (array([0, 1]), array([ 977, 99023])), density: 1, format: arr, time: 0.4651978015899658 | |
SMOTE: X shape: (100000, 100), y imbalance: (array([0, 1]), array([10023, 89977])), density: 0.1, format: coo, time: 3.597799777984619 | |
SMOTE: X shape: (100000, 100), y imbalance: (array([0, 1]), array([10023, 89977])), density: 0.1, format: csr, time: 3.4779179096221924 | |
SMOTE: X shape: (100000, 100), y imbalance: (array([0, 1]), array([10023, 89977])), density: 0.1, format: csc, time: 3.8068392276763916 | |
SMOTE: X shape: (100000, 100), y imbalance: (array([0, 1]), array([10023, 89977])), density: 1, format: arr, time: 16.16456127166748 | |
''' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment