Skip to content

Instantly share code, notes, and snippets.

@jfsantos
Created April 5, 2017 17:40
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jfsantos/8184653991558e30a9eab8613a6ea20f to your computer and use it in GitHub Desktop.
Save jfsantos/8184653991558e30a9eab8613a6ea20f to your computer and use it in GitHub Desktop.
from __future__ import division
import multiprocessing
import scipy.spatial.distance
import numpy as np
import sklearn.datasets
from time import time
from multiprocessing import Pool
from itertools import combinations
# Generate some data ###########################################################
N = 100
centers = [[0, 0], [1, 0], [0.5, np.sqrt(0.75)]]
# The SDs:
cluster_std = [0.3, 0.3, 0.3]
n_clusters = len(centers)
n_samples = int(0.75 * N)
data, labels_true = sklearn.datasets.make_blobs(n_samples=n_samples,\
centers=centers, cluster_std=cluster_std)
centers = [[0.5, np.sqrt(0.75)]]
cluster_std = [0.3]
n_clusters = len(centers)
extra, labels_true = sklearn.datasets.make_blobs(n_samples=int(0.25*N),\
centers=centers, cluster_std=cluster_std)
X = np.concatenate((data, extra), axis=0)
################################################################################
# Now do it the scipy way ######################################################
try:
t = time()
Y = scipy.spatial.distance.pdist(X, 'euclidean')
print Y.sum()
print '{} s'.format(time() -t)
except MemoryError:
pass
################################################################################
# Now the way I want to but without multiprocessing ############################
def calculate_pairwise_distance(a, b):
return np.linalg.norm(a - b)
t = time()
comb_sum = 0
for comb in combinations(range(X.shape[0]), 2):
comb_sum += calculate_pairwise_distance(X[comb[0]], X[comb[1]])
print comb_sum
print '{} s'.format(time() -t)
################################################################################
# And finally the way I want to with multiprocessing ###########################
def calculate_pairwise_distance_tuple(x):
return np.linalg.norm(x[0] - x[1])
p = Pool(4)
results = []
t = time()
combs = [(X[comb[0]].copy(), X[comb[1]].copy()) for comb in combinations(xrange(X.shape[0]), 2)]
results = p.map(calculate_pairwise_distance_tuple, combs)
print sum(results)
print '{} s'.format(time() -t)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment