Created
November 10, 2020 11:34
-
-
Save svenvanhal/015c031325a463aa726529523df255a1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
from isotree import IsolationForest as IsoTreeIsolationForest | |
from sklearn.datasets import make_classification | |
from sklearn.ensemble import IsolationForest as ScikitLearnIsolationForest | |
from sklearn.model_selection import train_test_split | |
def isotree_test(X1, X2, params): | |
iso = IsoTreeIsolationForest(ntrees=25, nthreads=-1, random_seed=259, **params) | |
t_start_fit = time.perf_counter() | |
iso.fit(X1) | |
t_start_predict = time.perf_counter() | |
iso.predict(X2) | |
t_finish = time.perf_counter() | |
return t_start_predict - t_start_fit, t_finish - t_start_predict | |
def sklearn_test(X1, X2, params): | |
iso = ScikitLearnIsolationForest(n_estimators=25, n_jobs=-1, random_state=259, **params) | |
t_start_fit = time.perf_counter() | |
iso.fit(X1) | |
t_start_predict = time.perf_counter() | |
iso.score_samples(X2) | |
t_finish = time.perf_counter() | |
return t_start_predict - t_start_fit, t_finish - t_start_predict | |
def run_experiment(X1, X2, max_samples): | |
print(f"max_samples: {max_samples}") | |
print(" Fit time Predict time") | |
# Scikit-Learn IsolationForest | |
t_fit, t_predict = sklearn_test(X1, X2, {'max_samples': max_samples}) | |
print(f'[Scikit-Learn / IF] {t_fit: >10.1f}s {t_predict: >12.1f}s') | |
# IsoTree IsolationForest (ndim=1) | |
t_fit, t_predict = isotree_test(X1, X2, {'ndim': 1, 'sample_size': max_samples}) | |
print(f'[IsoTree / IF] {t_fit: >10.1f}s {t_predict: >12.1f}s') | |
# IsoTree Extended Isolation Forest (ndim=2) | |
t_fit, t_predict = isotree_test(X1, X2, {'ndim': 2, 'sample_size': max_samples}) | |
print(f'[IsoTree / EIF] {t_fit: >10.1f}s {t_predict: >12.1f}s') | |
print() | |
if __name__ == "__main__": | |
X, _ = make_classification(100_000, n_features=10, n_informative=10, n_redundant=0, random_state=42) | |
X1, X2 = train_test_split(X) | |
print(f"# samples fit: {len(X1):,}") | |
print(f"# samples predict: {len(X2):,}\n") | |
run_experiment(X1, X2, max_samples=256) | |
run_experiment(X1, X2, max_samples=2_048) | |
run_experiment(X1, X2, max_samples=16_384) | |
run_experiment(X1, X2, max_samples=65_536) | |
print('---\n') | |
X, _ = make_classification(1_000_000, n_features=10, n_informative=10, n_redundant=0, random_state=42) | |
X1, X2 = train_test_split(X) | |
print(f"# samples fit: {len(X1):,}") | |
print(f"# samples predict: {len(X2):,}\n") | |
run_experiment(X1, X2, max_samples=256) | |
run_experiment(X1, X2, max_samples=2_048) | |
run_experiment(X1, X2, max_samples=16_384) | |
run_experiment(X1, X2, max_samples=65_536) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment