Skip to content

Instantly share code, notes, and snippets.

@pierreglaser
Last active February 27, 2019 14:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pierreglaser/dbad42c29fd618af50fccd3753020e46 to your computer and use it in GitHub Desktop.
Save pierreglaser/dbad42c29fd618af50fccd3753020e46 to your computer and use it in GitHub Desktop.
benchmark of parwise distances when using different backends
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Wall clock time measurement of pairwise_distances"""
import os
import csv
import time
import itertools
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.externals.joblib import parallel_backend, Parallel, delayed
from sklearn.metrics import pairwise_distances
metrics = ['euclidean', 'manhattan', 'cosine']
backends = ['threading', 'loky']
n_jobs_list = [2, 4]
shapes= [(10000, 100)]
results = []
for s, metric, backend, n_jobs in list(itertools.product(shapes, metrics, backends,
n_jobs_list)):
X = np.random.randn(*s)
with parallel_backend(backend=backend):
t0 = time.time()
pairwise_distances(X, metric=metric, n_jobs=n_jobs)
total_time = time.time() - t0
print("shape: {:>15} distance: {:>10}, backend: {:>10}, n_jobs {}: "
" total time {:.3f}".format(str(s), metric, backend, n_jobs,
total_time))
results.append(total_time)
index = pd.MultiIndex.from_product(
[shapes, metrics, backends, n_jobs_list],
names=['shape', 'distance', 'backend', 'n_jobs'])
results = pd.Series(results, index=index).to_frame('time')
sns.catplot(data=results.reset_index(), y='time', x='n_jobs', hue='backend',
col='distance', row='shape', kind='bar', sharey='row')
plt.savefig('parallel_pairwise_plot.png')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment