This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def multivariateGaussian(X, mu, sigma): | |
k = len(mu) | |
sigma=np.diag(sigma) | |
X = X - mu.T | |
p = 1/((2*np.pi)**(k/2)*(np.linalg.det(sigma)**0.5))* np.exp(-0.5* np.sum(X @ np.linalg.pinv(sigma) * X,axis=1)) | |
return p | |
p = multivariateGaussian(X, mu, sigma) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def estimateGaussian(X): | |
m = X.shape[0] | |
#compute mean of X | |
sum_ = np.sum(X,axis=0) | |
mu = (sum_/m) | |
# compute variance of X | |
var = np.var(X,axis=0) | |
print(mu, var) | |
return mu,var |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn.datasets.samples_generator import make_blobs | |
X, y_true = make_blobs(n_samples=500, centers=1, cluster_std=0.60, random_state=5) | |
X_append, y_true_append = make_blobs(n_samples=20,centers=1, cluster_std=5,random_state=5) | |
X = np.vstack([X,X_append]) | |
y_true = np.hstack([y_true, [1 for _ in y_true_append]]) | |
X = X[:, ::-1] | |
plt.scatter(X[:,0],X[:,1],marker="x"); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plt.figure(figsize=(8,6)) | |
plt.scatter(X[:,0],X[:,1],marker="x",c=p,cmap='viridis'); | |
plt.colorbar(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plt.figure(figsize=(8,6)) | |
plt.scatter(X[:,0],X[:,1],marker="x",c=p,cmap='viridis'); | |
# Circling of anomalies | |
outliers = np.nonzero(p<epsilon)[0] | |
plt.scatter(X[outliers,0],X[outliers,1],marker="o",facecolor="none",edgecolor="r",s=70); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.datasets.samples_generator import make_blobs | |
from sklearn.model_selection import train_test_split | |
X, y_true = make_blobs(n_samples=400, centers=5, cluster_std=0.60, random_state=1) | |
X_append, y_true_append = make_blobs(n_samples=50,centers=5, cluster_std=5,random_state=1) | |
X = np.vstack([X,X_append]) | |
y_true = np.hstack([[0 for _ in y_true], [1 for _ in y_true_append]]) | |
X = X[:, ::-1] # flip axes for better plotting | |
X_train, X_test, y_train, y_test = train_test_split(X, y_true, test_size=0.33, random_state=1, shuffle=True) | |
plt.scatter(X_train[:,0],X_train[:,1],marker="x"); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mu, sigma = estimateGaussian(X_train) | |
p = multivariateGaussian(X_test, mu, sigma) | |
plt.figure(figsize=(8,6)) | |
plt.scatter(X_test[:,0],X_test[:,1],marker="x",c=p,cmap='viridis'); | |
outliers = np.nonzero(p<0.001)[0] | |
plt.scatter(X_test[outliers,0],X_test[outliers,1],marker="o",facecolor="none",edgecolor="r",s=70); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.mixture import GaussianMixture | |
gm = GaussianMixture(n_components = 5, covariance_type = 'full', random_state=0, ) | |
gm.fit(X_train) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plt.figure(figsize=(10,10)) | |
for i in range(5): | |
plt.subplot(3,2,i+1) | |
plt.scatter(X_train[:,0],X_train[:,1],c=gm.predict_proba(X_train)[:,i],cmap='viridis',marker='x') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import nltk | |
nltk.download('stopwords') | |
from nltk.corpus import stopwords | |
stop_words = set(stopwords.words('english')) |
OlderNewer