Skip to content

Instantly share code, notes, and snippets.

@freedomtowin
Last active March 8, 2020 02:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save freedomtowin/a196b8026b96a9264fdbd45aa77bae12 to your computer and use it in GitHub Desktop.
Save freedomtowin/a196b8026b96a9264fdbd45aa77bae12 to your computer and use it in GitHub Desktop.
from sklearn import datasets,mixture
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(1000)
y = datasets.load_iris().data
names = datasets.load_iris().feature_names
num_clusters = 3
gmm = mixture.GaussianMixture(n_components=num_clusters,max_iter=100000,tol=0.00001).fit(y)
colors = cm.rainbow(np.linspace(0.1,0.9, num_clusters))
dim_combos = [(i,j) for i in range(y.shape[1]) for j in range(y.shape[1]) if j>i]
log_prob_norm,log_prob = gmm._estimate_log_prob_resp(y)
gmm_mask = np.all(np.exp(log_prob)<0.9,axis=1)
col = ['green', 'red', 'indigo']
dist = np.zeros((y.shape[0],num_clusters))
count=1
plt.figure(figsize=(10,15))
for i,j in dim_combos:
plt.subplot(len(dim_combos),1,count)
for k in range(num_clusters):
w1 = gmm.means_[k,[i]]
w2 = gmm.means_[k,[j]]
k_cov = gmm.covariances_[k]
C = np.array([[k_cov[i,i],k_cov[i,j]],[k_cov[j,i],k_cov[j,j]]])
eVa, eVe = np.linalg.eig(C)
R, S = eVe, np.diag(np.sqrt(eVa))
#create circle (points)
z = np.arange(0,2*np.pi+np.pi/8,np.pi/8)
points = np.array([[np.cos(z[i]),np.sin(z[i])] for i in range(z.shape[0]-1)])
points = np.concatenate([points,points[:1]])
#1 std away
T = (S*1).dot(R.T)
points = points.dot(T)
points[:,0] = points[:,0]+w1
points[:,1] = points[:,1]+w2
plt.plot(points[:,0],points[:,1],color=col[k])
points = np.array([[np.cos(z[i]),np.sin(z[i+1])] for i in range(z.shape[0]-1)])
points = np.concatenate([points,points[:1]])
#2 std away
T = (S*4).dot(R.T)
points = points.dot(T)
points[:,0] = points[:,0]+w1
points[:,1] = points[:,1]+w2
plt.plot(points[:,0],points[:,1],color=col[k])
plt.plot(y[:,i],y[:,j],'.',color='black', markersize=8)
#low proba data points
plt.scatter(y[gmm_mask,i],y[gmm_mask,j], c=np.linspace(0,1,np.sum(gmm_mask)), cmap='rainbow', s=100, alpha=0.8)
plt.xlabel(names[i])
plt.ylabel(names[j])
count+=1
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment