Skip to content

Instantly share code, notes, and snippets.

simulate <- function(nsim,nvec){
simdx <- c()
for(i in 1:length(nvec))
simdx <- c(simdx,rep(1:nsim,each=nvec[i])+(i-1)*nsim)
dt <- data.table(sim=simdx)
bigN <- nrow(dt)
dt$n <- rep(rep(nvec,nvec),each=nsim)
dt$one <- 1
dt$simc <- dt[,cumsum(one),by=sim]$V1
dt$one <- NULL
# TODO: Import FastICA
from sklearn.decomposition import FastICA
# TODO: Initialize FastICA with n_components=3
ica = FastICA(n_components=3)
# TODO: Run the FastICA algorithm using fit_transform on dataset X
ica_result = ica.fit_transform(X)
ica_result.shape
# TODO: Import GaussianMixture
from sklearn.mixture import GaussianMixture
# TODO: Create an instance of Gaussian Mixture with 3 components
gmm = GaussianMixture(n_components=3).fit(X)
# TODO: fit the dataset
gmm = gmm.fit(X)
# TODO: predict the clustering labels for the dataset
from sklearn import datasets, cluster
#Load Dataset
X = datasets.load_iris().data[:10]
#Specify the parameters for clustering.
#'ward' linkage is default but 'complete' and 'average' can be used too.
clust = cluster.AgglomerateClustering(n_clusters = 3, linkage = 'ward')
labels = clust.fit_predict(X)
from sklearn.cluster import KMeans
# Number of clusters
kmeans = KMeans(n_clusters=k)
# Fitting the input data
kmeans = kmeans.fit(X)
# Getting the cluster labels
labels = kmeans.predict(X)
# Centroid values
centroids = kmeans.cluster_centers_
import pandas as pd
import numpy as np
from scipy import stats
from sklearn import preprocessing
from sklearn.ensemble import ExtraTreesClassifier
def variable_selection(df, target, variance_thres, pbs_thres, chi_sqr_thres, feat_imp_thres):
def normalize(df):
import pandas as pd
import numpy as np
from sklearn.ensemble import ExtraTreesClassifier
import matplotlib.pyplot as plt
data = pd.read_csv("D://Blogs//train.csv")
model = ExtraTreesClassifier()
model.fit(X,y)
print(model.feature_importances_) #use inbuilt class feature_importances of tree based classifiers
# Load libraries
from sklearn import datasets
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
# Create a function to decide number of components
def select_n_components(var_ratio, goal_var: float) -> int:
# Set initial variance explained so far
total_variance = 0.0
# Set initial number of features
from __future__ import division
from itertools import count
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import collections
from matplotlib import style
@spikar
spikar / kmeans_update.py
Last active October 1, 2020 16:12
k-means python code with update functionality
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')
import numpy as np
colors = 10 * ["g", "r", "c", "b", "k"]
class K_Means: