Last active
February 27, 2019 14:08
-
-
Save pierreglaser/dbad42c29fd618af50fccd3753020e46 to your computer and use it in GitHub Desktop.
benchmark of parwise distances when using different backends
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
"""Wall clock time measurement of pairwise_distances""" | |
import os | |
import csv | |
import time | |
import itertools | |
import numpy as np | |
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
from sklearn.externals.joblib import parallel_backend, Parallel, delayed | |
from sklearn.metrics import pairwise_distances | |
metrics = ['euclidean', 'manhattan', 'cosine'] | |
backends = ['threading', 'loky'] | |
n_jobs_list = [2, 4] | |
shapes= [(10000, 100)] | |
results = [] | |
for s, metric, backend, n_jobs in list(itertools.product(shapes, metrics, backends, | |
n_jobs_list)): | |
X = np.random.randn(*s) | |
with parallel_backend(backend=backend): | |
t0 = time.time() | |
pairwise_distances(X, metric=metric, n_jobs=n_jobs) | |
total_time = time.time() - t0 | |
print("shape: {:>15} distance: {:>10}, backend: {:>10}, n_jobs {}: " | |
" total time {:.3f}".format(str(s), metric, backend, n_jobs, | |
total_time)) | |
results.append(total_time) | |
index = pd.MultiIndex.from_product( | |
[shapes, metrics, backends, n_jobs_list], | |
names=['shape', 'distance', 'backend', 'n_jobs']) | |
results = pd.Series(results, index=index).to_frame('time') | |
sns.catplot(data=results.reset_index(), y='time', x='n_jobs', hue='backend', | |
col='distance', row='shape', kind='bar', sharey='row') | |
plt.savefig('parallel_pairwise_plot.png') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment