Skip to content

Instantly share code, notes, and snippets.

@svenvanhal
Created November 10, 2020 11:34
Show Gist options
  • Save svenvanhal/015c031325a463aa726529523df255a1 to your computer and use it in GitHub Desktop.
Save svenvanhal/015c031325a463aa726529523df255a1 to your computer and use it in GitHub Desktop.
import time
from isotree import IsolationForest as IsoTreeIsolationForest
from sklearn.datasets import make_classification
from sklearn.ensemble import IsolationForest as ScikitLearnIsolationForest
from sklearn.model_selection import train_test_split
def isotree_test(X1, X2, params):
iso = IsoTreeIsolationForest(ntrees=25, nthreads=-1, random_seed=259, **params)
t_start_fit = time.perf_counter()
iso.fit(X1)
t_start_predict = time.perf_counter()
iso.predict(X2)
t_finish = time.perf_counter()
return t_start_predict - t_start_fit, t_finish - t_start_predict
def sklearn_test(X1, X2, params):
iso = ScikitLearnIsolationForest(n_estimators=25, n_jobs=-1, random_state=259, **params)
t_start_fit = time.perf_counter()
iso.fit(X1)
t_start_predict = time.perf_counter()
iso.score_samples(X2)
t_finish = time.perf_counter()
return t_start_predict - t_start_fit, t_finish - t_start_predict
def run_experiment(X1, X2, max_samples):
print(f"max_samples: {max_samples}")
print(" Fit time Predict time")
# Scikit-Learn IsolationForest
t_fit, t_predict = sklearn_test(X1, X2, {'max_samples': max_samples})
print(f'[Scikit-Learn / IF] {t_fit: >10.1f}s {t_predict: >12.1f}s')
# IsoTree IsolationForest (ndim=1)
t_fit, t_predict = isotree_test(X1, X2, {'ndim': 1, 'sample_size': max_samples})
print(f'[IsoTree / IF] {t_fit: >10.1f}s {t_predict: >12.1f}s')
# IsoTree Extended Isolation Forest (ndim=2)
t_fit, t_predict = isotree_test(X1, X2, {'ndim': 2, 'sample_size': max_samples})
print(f'[IsoTree / EIF] {t_fit: >10.1f}s {t_predict: >12.1f}s')
print()
if __name__ == "__main__":
X, _ = make_classification(100_000, n_features=10, n_informative=10, n_redundant=0, random_state=42)
X1, X2 = train_test_split(X)
print(f"# samples fit: {len(X1):,}")
print(f"# samples predict: {len(X2):,}\n")
run_experiment(X1, X2, max_samples=256)
run_experiment(X1, X2, max_samples=2_048)
run_experiment(X1, X2, max_samples=16_384)
run_experiment(X1, X2, max_samples=65_536)
print('---\n')
X, _ = make_classification(1_000_000, n_features=10, n_informative=10, n_redundant=0, random_state=42)
X1, X2 = train_test_split(X)
print(f"# samples fit: {len(X1):,}")
print(f"# samples predict: {len(X2):,}\n")
run_experiment(X1, X2, max_samples=256)
run_experiment(X1, X2, max_samples=2_048)
run_experiment(X1, X2, max_samples=16_384)
run_experiment(X1, X2, max_samples=65_536)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment