Skip to content

Instantly share code, notes, and snippets.

View agastidukare's full-sized avatar

Agasti Kishor Dukare agastidukare

View GitHub Profile
@agastidukare
agastidukare / gaussian.py
Created December 31, 2019 23:15
This function computes the probability density function of a multivariate gaussian distribution.
def multivariateGaussian(X, mu, sigma):
k = len(mu)
sigma=np.diag(sigma)
X = X - mu.T
p = 1/((2*np.pi)**(k/2)*(np.linalg.det(sigma)**0.5))* np.exp(-0.5* np.sum(X @ np.linalg.pinv(sigma) * X,axis=1))
return p
p = multivariateGaussian(X, mu, sigma)
@agastidukare
agastidukare / mean_variance.py
Last active December 31, 2019 23:48
This function calculates mean and variance of a Gaussian distribution.
def estimateGaussian(X):
m = X.shape[0]
#compute mean of X
sum_ = np.sum(X,axis=0)
mu = (sum_/m)
# compute variance of X
var = np.var(X,axis=0)
print(mu, var)
return mu,var
@agastidukare
agastidukare / import.py
Last active January 1, 2020 00:03
Import necessary modules and create a data-set
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets.samples_generator import make_blobs
X, y_true = make_blobs(n_samples=500, centers=1, cluster_std=0.60, random_state=5)
X_append, y_true_append = make_blobs(n_samples=20,centers=1, cluster_std=5,random_state=5)
X = np.vstack([X,X_append])
y_true = np.hstack([y_true, [1 for _ in y_true_append]])
X = X[:, ::-1]
plt.scatter(X[:,0],X[:,1],marker="x");
@agastidukare
agastidukare / plot_prob.py
Created January 1, 2020 19:30
This code plots the distribution with probability.
plt.figure(figsize=(8,6))
plt.scatter(X[:,0],X[:,1],marker="x",c=p,cmap='viridis');
plt.colorbar();
@agastidukare
agastidukare / mark_outliers.py
Created January 1, 2020 20:06
This code will circle the outliers depending upon threshold value.
plt.figure(figsize=(8,6))
plt.scatter(X[:,0],X[:,1],marker="x",c=p,cmap='viridis');
# Circling of anomalies
outliers = np.nonzero(p<epsilon)[0]
plt.scatter(X[outliers,0],X[outliers,1],marker="o",facecolor="none",edgecolor="r",s=70);
@agastidukare
agastidukare / mixture_data.py
Created January 3, 2020 18:51
This code will generate data with 5 clusters.
from sklearn.datasets.samples_generator import make_blobs
from sklearn.model_selection import train_test_split
X, y_true = make_blobs(n_samples=400, centers=5, cluster_std=0.60, random_state=1)
X_append, y_true_append = make_blobs(n_samples=50,centers=5, cluster_std=5,random_state=1)
X = np.vstack([X,X_append])
y_true = np.hstack([[0 for _ in y_true], [1 for _ in y_true_append]])
X = X[:, ::-1] # flip axes for better plotting
X_train, X_test, y_train, y_test = train_test_split(X, y_true, test_size=0.33, random_state=1, shuffle=True)
plt.scatter(X_train[:,0],X_train[:,1],marker="x");
@agastidukare
agastidukare / plot_mixture.py
Created January 3, 2020 19:18
This code will plot probability plot for multiple clusters
mu, sigma = estimateGaussian(X_train)
p = multivariateGaussian(X_test, mu, sigma)
plt.figure(figsize=(8,6))
plt.scatter(X_test[:,0],X_test[:,1],marker="x",c=p,cmap='viridis');
outliers = np.nonzero(p<0.001)[0]
plt.scatter(X_test[outliers,0],X_test[outliers,1],marker="o",facecolor="none",edgecolor="r",s=70);
@agastidukare
agastidukare / gaussian_mixture.py
Created January 4, 2020 21:01
This code imports & fits Mixture of Gaussian to train data
from sklearn.mixture import GaussianMixture
gm = GaussianMixture(n_components = 5, covariance_type = 'full', random_state=0, )
gm.fit(X_train)
@agastidukare
agastidukare / mixture_pred_prob.py
Created January 4, 2020 21:06
This code will find the probability of each point being part of any of the five clusters.
plt.figure(figsize=(10,10))
for i in range(5):
plt.subplot(3,2,i+1)
plt.scatter(X_train[:,0],X_train[:,1],c=gm.predict_proba(X_train)[:,i],cmap='viridis',marker='x')
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))