Skip to content

Instantly share code, notes, and snippets.

@JefferyW
Last active January 8, 2020 22:38
Show Gist options
  • Save JefferyW/4a158f9800831a28f1b952dcb7dc3353 to your computer and use it in GitHub Desktop.
Save JefferyW/4a158f9800831a28f1b952dcb7dc3353 to your computer and use it in GitHub Desktop.
T-SNE visualisation tool
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.cm as cm
import numpy as np
from scipy.spatial import ConvexHull
from sklearn.mixture import GaussianMixture
from scipy import linalg
from sklearn.neighbors import NearestNeighbors
from sklearn.manifold import TSNE
import warnings
warnings.filterwarnings("ignore")
def convexHulls(points, labels):
# computing convex hulls for a set of points with asscoiated labels
convex_hulls = []
for i in range(10):
convex_hulls.append(ConvexHull(points[labels==i,:]))
return convex_hulls
def best_ellipses(points, labels):
# computing best fiiting ellipse for a set of points with asscoiated labels
gaussians = []
for i in range(10):
gaussians.append(GaussianMixture(n_components=1, covariance_type='full').fit(points[labels==i, :]))
return gaussians
def neighboring_hit(points, labels):
k = 6
nbrs = NearestNeighbors(n_neighbors=k+1, algorithm='ball_tree').fit(points)
distances, indices = nbrs.kneighbors(points)
txs = 0.0
txsc = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
nppts = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
for i in range(len(points)):
tx = 0.0
for j in range(1,k+1):
if (labels[indices[i,j]]== labels[i]):
tx += 1
tx /= k
txsc[labels[i]] += tx
nppts[labels[i]] += 1
txs += tx
for i in range(10):
txsc[i] /= nppts[i]
return txs / len(points)
def plot_results(X, Y_, means, covariances, index, title, color):
splot = plt.subplot(3, 1, 3)
for i, (mean, covar) in enumerate(zip(means, covariances)):
v, w = linalg.eigh(covar)
v = 2. * np.sqrt(2.) * np.sqrt(v)
u = w[0] / linalg.norm(w[0])
# as the DP will not use every component it has access to
# unless it needs it, we shouldn't plot the redundant
# components.
if not np.any(Y_ == i):
continue
plt.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color, alpha = 0.2)
# Plot an ellipse to show the Gaussian component
angle = np.arctan(u[1] / u[0])
angle = 180. * angle / np.pi # convert to degrees
ell = mpl.patches.Ellipse(mean, v[0], v[1], 180. + angle, color=color)
ell.set_clip_box(splot.bbox)
ell.set_alpha(0.6)
splot.add_artist(ell)
plt.title(title)
def visualization(points2D, labels, convex_hulls, ellipses ,projname, nh):
points2D_c= []
for i in range(10):
points2D_c.append(points2D[labels==i, :])
# Data Visualization
cmap =cm.tab10
plt.figure(figsize=(3.841, 7.195), dpi=100)
plt.set_cmap(cmap)
plt.subplots_adjust(hspace=0.4 )
plt.subplot(311)
plt.scatter(points2D[:,0], points2D[:,1], c=labels, s=3,edgecolors='none', cmap=cmap, alpha=1.0)
plt.colorbar(ticks=range(10))
plt.title("2D "+projname+" - NH="+str(nh*100.0))
vals = [ i/10.0 for i in range(10)]
sp2 = plt.subplot(312)
for i in range(10):
ch = np.append(convex_hulls[i].vertices,convex_hulls[i].vertices[0])
sp2.plot(points2D_c[i][ch, 0], points2D_c[i][ch, 1], '-',label='$%i$'%i, color=cmap(vals[i]))
plt.colorbar(ticks=range(10))
plt.title(projname+" Convex Hulls")
plt.subplot(313)
for i in range(10):
plot_results(points2D[labels==i, :], ellipses[i].predict(points2D[labels==i, :]), ellipses[i].means_,
ellipses[i].covariances_, 0,projname+" fitting ellipses", cmap(vals[i]))
plt.savefig(projname+".png", dpi=100)
plt.show()
# implementation #
# Train T-SNE
tsne = TSNE(init='pca', perplexity=30, verbose=2)
x2d = tsne.fit_transform(y_pred)
# Visualisation #
# input 2d matrix and labels
convex_hulls = convexHulls(x2d_, y_test)
ellipses = best_ellipses(x2d, y_test)
neighborhit = neighboring_hit(x2d, y_test)
visualization(x2d, y_test, convex_hulls, ellipses, 'filename',neighborhit)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment