Last active
January 8, 2020 22:38
-
-
Save JefferyW/4a158f9800831a28f1b952dcb7dc3353 to your computer and use it in GitHub Desktop.
T-SNE visualisation tool
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import matplotlib as mpl | |
import matplotlib.cm as cm | |
import numpy as np | |
from scipy.spatial import ConvexHull | |
from sklearn.mixture import GaussianMixture | |
from scipy import linalg | |
from sklearn.neighbors import NearestNeighbors | |
from sklearn.manifold import TSNE | |
import warnings | |
warnings.filterwarnings("ignore") | |
def convexHulls(points, labels): | |
# computing convex hulls for a set of points with asscoiated labels | |
convex_hulls = [] | |
for i in range(10): | |
convex_hulls.append(ConvexHull(points[labels==i,:])) | |
return convex_hulls | |
def best_ellipses(points, labels): | |
# computing best fiiting ellipse for a set of points with asscoiated labels | |
gaussians = [] | |
for i in range(10): | |
gaussians.append(GaussianMixture(n_components=1, covariance_type='full').fit(points[labels==i, :])) | |
return gaussians | |
def neighboring_hit(points, labels): | |
k = 6 | |
nbrs = NearestNeighbors(n_neighbors=k+1, algorithm='ball_tree').fit(points) | |
distances, indices = nbrs.kneighbors(points) | |
txs = 0.0 | |
txsc = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] | |
nppts = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] | |
for i in range(len(points)): | |
tx = 0.0 | |
for j in range(1,k+1): | |
if (labels[indices[i,j]]== labels[i]): | |
tx += 1 | |
tx /= k | |
txsc[labels[i]] += tx | |
nppts[labels[i]] += 1 | |
txs += tx | |
for i in range(10): | |
txsc[i] /= nppts[i] | |
return txs / len(points) | |
def plot_results(X, Y_, means, covariances, index, title, color): | |
splot = plt.subplot(3, 1, 3) | |
for i, (mean, covar) in enumerate(zip(means, covariances)): | |
v, w = linalg.eigh(covar) | |
v = 2. * np.sqrt(2.) * np.sqrt(v) | |
u = w[0] / linalg.norm(w[0]) | |
# as the DP will not use every component it has access to | |
# unless it needs it, we shouldn't plot the redundant | |
# components. | |
if not np.any(Y_ == i): | |
continue | |
plt.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color, alpha = 0.2) | |
# Plot an ellipse to show the Gaussian component | |
angle = np.arctan(u[1] / u[0]) | |
angle = 180. * angle / np.pi # convert to degrees | |
ell = mpl.patches.Ellipse(mean, v[0], v[1], 180. + angle, color=color) | |
ell.set_clip_box(splot.bbox) | |
ell.set_alpha(0.6) | |
splot.add_artist(ell) | |
plt.title(title) | |
def visualization(points2D, labels, convex_hulls, ellipses ,projname, nh): | |
points2D_c= [] | |
for i in range(10): | |
points2D_c.append(points2D[labels==i, :]) | |
# Data Visualization | |
cmap =cm.tab10 | |
plt.figure(figsize=(3.841, 7.195), dpi=100) | |
plt.set_cmap(cmap) | |
plt.subplots_adjust(hspace=0.4 ) | |
plt.subplot(311) | |
plt.scatter(points2D[:,0], points2D[:,1], c=labels, s=3,edgecolors='none', cmap=cmap, alpha=1.0) | |
plt.colorbar(ticks=range(10)) | |
plt.title("2D "+projname+" - NH="+str(nh*100.0)) | |
vals = [ i/10.0 for i in range(10)] | |
sp2 = plt.subplot(312) | |
for i in range(10): | |
ch = np.append(convex_hulls[i].vertices,convex_hulls[i].vertices[0]) | |
sp2.plot(points2D_c[i][ch, 0], points2D_c[i][ch, 1], '-',label='$%i$'%i, color=cmap(vals[i])) | |
plt.colorbar(ticks=range(10)) | |
plt.title(projname+" Convex Hulls") | |
plt.subplot(313) | |
for i in range(10): | |
plot_results(points2D[labels==i, :], ellipses[i].predict(points2D[labels==i, :]), ellipses[i].means_, | |
ellipses[i].covariances_, 0,projname+" fitting ellipses", cmap(vals[i])) | |
plt.savefig(projname+".png", dpi=100) | |
plt.show() | |
# implementation # | |
# Train T-SNE | |
tsne = TSNE(init='pca', perplexity=30, verbose=2) | |
x2d = tsne.fit_transform(y_pred) | |
# Visualisation # | |
# input 2d matrix and labels | |
convex_hulls = convexHulls(x2d_, y_test) | |
ellipses = best_ellipses(x2d, y_test) | |
neighborhit = neighboring_hit(x2d, y_test) | |
visualization(x2d, y_test, convex_hulls, ellipses, 'filename',neighborhit) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment