svenvanhal/isotree_benchmark.py

## isotree_benchmark.py
import time

from isotree import IsolationForest as IsoTreeIsolationForest
from sklearn.datasets import make_classification
from sklearn.ensemble import IsolationForest as ScikitLearnIsolationForest
from sklearn.model_selection import train_test_split


def isotree_test(X1, X2, params):
    iso = IsoTreeIsolationForest(ntrees=25, nthreads=-1, random_seed=259, **params)

    t_start_fit = time.perf_counter()
    iso.fit(X1)
    t_start_predict = time.perf_counter()
    iso.predict(X2)
    t_finish = time.perf_counter()

    return t_start_predict - t_start_fit, t_finish - t_start_predict


def sklearn_test(X1, X2, params):
    iso = ScikitLearnIsolationForest(n_estimators=25, n_jobs=-1, random_state=259, **params)

    t_start_fit = time.perf_counter()
    iso.fit(X1)
    t_start_predict = time.perf_counter()
    iso.score_samples(X2)
    t_finish = time.perf_counter()

    return t_start_predict - t_start_fit, t_finish - t_start_predict


def run_experiment(X1, X2, max_samples):
    print(f"max_samples: {max_samples}")
    print("                       Fit time  Predict time")

    # Scikit-Learn IsolationForest
    t_fit, t_predict = sklearn_test(X1, X2, {'max_samples': max_samples})
    print(f'[Scikit-Learn / IF] {t_fit: >10.1f}s {t_predict: >12.1f}s')

    # IsoTree IsolationForest (ndim=1)
    t_fit, t_predict = isotree_test(X1, X2, {'ndim': 1, 'sample_size': max_samples})
    print(f'[IsoTree / IF]      {t_fit: >10.1f}s {t_predict: >12.1f}s')

    # IsoTree Extended Isolation Forest (ndim=2)
    t_fit, t_predict = isotree_test(X1, X2, {'ndim': 2, 'sample_size': max_samples})
    print(f'[IsoTree / EIF]     {t_fit: >10.1f}s {t_predict: >12.1f}s')

    print()


if __name__ == "__main__":
    X, _ = make_classification(100_000, n_features=10, n_informative=10, n_redundant=0, random_state=42)
    X1, X2 = train_test_split(X)

    print(f"# samples fit:     {len(X1):,}")
    print(f"# samples predict: {len(X2):,}\n")

    run_experiment(X1, X2, max_samples=256)
    run_experiment(X1, X2, max_samples=2_048)
    run_experiment(X1, X2, max_samples=16_384)
    run_experiment(X1, X2, max_samples=65_536)

    print('---\n')

    X, _ = make_classification(1_000_000, n_features=10, n_informative=10, n_redundant=0, random_state=42)
    X1, X2 = train_test_split(X)

    print(f"# samples fit:     {len(X1):,}")
    print(f"# samples predict: {len(X2):,}\n")

    run_experiment(X1, X2, max_samples=256)
    run_experiment(X1, X2, max_samples=2_048)
    run_experiment(X1, X2, max_samples=16_384)
    run_experiment(X1, X2, max_samples=65_536)
	import time

	from isotree import IsolationForest as IsoTreeIsolationForest
	from sklearn.datasets import make_classification
	from sklearn.ensemble import IsolationForest as ScikitLearnIsolationForest
	from sklearn.model_selection import train_test_split


	def isotree_test(X1, X2, params):
	iso = IsoTreeIsolationForest(ntrees=25, nthreads=-1, random_seed=259, **params)

	t_start_fit = time.perf_counter()
	iso.fit(X1)
	t_start_predict = time.perf_counter()
	iso.predict(X2)
	t_finish = time.perf_counter()

	return t_start_predict - t_start_fit, t_finish - t_start_predict


	def sklearn_test(X1, X2, params):
	iso = ScikitLearnIsolationForest(n_estimators=25, n_jobs=-1, random_state=259, **params)

	t_start_fit = time.perf_counter()
	iso.fit(X1)
	t_start_predict = time.perf_counter()
	iso.score_samples(X2)
	t_finish = time.perf_counter()

	return t_start_predict - t_start_fit, t_finish - t_start_predict


	def run_experiment(X1, X2, max_samples):
	print(f"max_samples: {max_samples}")
	print(" Fit time Predict time")

	# Scikit-Learn IsolationForest
	t_fit, t_predict = sklearn_test(X1, X2, {'max_samples': max_samples})
	print(f'[Scikit-Learn / IF] {t_fit: >10.1f}s {t_predict: >12.1f}s')

	# IsoTree IsolationForest (ndim=1)
	t_fit, t_predict = isotree_test(X1, X2, {'ndim': 1, 'sample_size': max_samples})
	print(f'[IsoTree / IF] {t_fit: >10.1f}s {t_predict: >12.1f}s')

	# IsoTree Extended Isolation Forest (ndim=2)
	t_fit, t_predict = isotree_test(X1, X2, {'ndim': 2, 'sample_size': max_samples})
	print(f'[IsoTree / EIF] {t_fit: >10.1f}s {t_predict: >12.1f}s')

	print()


	if __name__ == "__main__":
	X, _ = make_classification(100_000, n_features=10, n_informative=10, n_redundant=0, random_state=42)
	X1, X2 = train_test_split(X)

	print(f"# samples fit: {len(X1):,}")
	print(f"# samples predict: {len(X2):,}\n")

	run_experiment(X1, X2, max_samples=256)
	run_experiment(X1, X2, max_samples=2_048)
	run_experiment(X1, X2, max_samples=16_384)
	run_experiment(X1, X2, max_samples=65_536)

	print('---\n')

	X, _ = make_classification(1_000_000, n_features=10, n_informative=10, n_redundant=0, random_state=42)
	X1, X2 = train_test_split(X)

	print(f"# samples fit: {len(X1):,}")
	print(f"# samples predict: {len(X2):,}\n")

	run_experiment(X1, X2, max_samples=256)
	run_experiment(X1, X2, max_samples=2_048)
	run_experiment(X1, X2, max_samples=16_384)
	run_experiment(X1, X2, max_samples=65_536)