Skip to content

Instantly share code, notes, and snippets.

@spikar
spikar / kmeans.py
Created February 21, 2019 12:23
k-means implementation in python from scratch
class K_Means:
def __init__(self, k=2, tol=0.001, max_iter=300):
self.k = k
self.tol = tol
self.max_iter = max_iter
def fit(self, data):
self.centroids = {}
@spikar
spikar / kmeans_update.py
Last active October 1, 2020 16:12
k-means python code with update functionality
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')
import numpy as np
colors = 10 * ["g", "r", "c", "b", "k"]
class K_Means:
from __future__ import division
from itertools import count
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import collections
from matplotlib import style
# Load libraries
from sklearn import datasets
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
# Create a function to decide number of components
def select_n_components(var_ratio, goal_var: float) -> int:
# Set initial variance explained so far
total_variance = 0.0
# Set initial number of features
import pandas as pd
import numpy as np
from sklearn.ensemble import ExtraTreesClassifier
import matplotlib.pyplot as plt
data = pd.read_csv("D://Blogs//train.csv")
model = ExtraTreesClassifier()
model.fit(X,y)
print(model.feature_importances_) #use inbuilt class feature_importances of tree based classifiers
import pandas as pd
import numpy as np
from scipy import stats
from sklearn import preprocessing
from sklearn.ensemble import ExtraTreesClassifier
def variable_selection(df, target, variance_thres, pbs_thres, chi_sqr_thres, feat_imp_thres):
def normalize(df):
from sklearn.cluster import KMeans
# Number of clusters
kmeans = KMeans(n_clusters=k)
# Fitting the input data
kmeans = kmeans.fit(X)
# Getting the cluster labels
labels = kmeans.predict(X)
# Centroid values
centroids = kmeans.cluster_centers_
from sklearn import datasets, cluster
#Load Dataset
X = datasets.load_iris().data[:10]
#Specify the parameters for clustering.
#'ward' linkage is default but 'complete' and 'average' can be used too.
clust = cluster.AgglomerateClustering(n_clusters = 3, linkage = 'ward')
labels = clust.fit_predict(X)
# TODO: Import GaussianMixture
from sklearn.mixture import GaussianMixture
# TODO: Create an instance of Gaussian Mixture with 3 components
gmm = GaussianMixture(n_components=3).fit(X)
# TODO: fit the dataset
gmm = gmm.fit(X)
# TODO: predict the clustering labels for the dataset
# TODO: Import FastICA
from sklearn.decomposition import FastICA
# TODO: Initialize FastICA with n_components=3
ica = FastICA(n_components=3)
# TODO: Run the FastICA algorithm using fit_transform on dataset X
ica_result = ica.fit_transform(X)
ica_result.shape