This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
simulate <- function(nsim,nvec){ | |
simdx <- c() | |
for(i in 1:length(nvec)) | |
simdx <- c(simdx,rep(1:nsim,each=nvec[i])+(i-1)*nsim) | |
dt <- data.table(sim=simdx) | |
bigN <- nrow(dt) | |
dt$n <- rep(rep(nvec,nvec),each=nsim) | |
dt$one <- 1 | |
dt$simc <- dt[,cumsum(one),by=sim]$V1 | |
dt$one <- NULL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# TODO: Import FastICA | |
from sklearn.decomposition import FastICA | |
# TODO: Initialize FastICA with n_components=3 | |
ica = FastICA(n_components=3) | |
# TODO: Run the FastICA algorithm using fit_transform on dataset X | |
ica_result = ica.fit_transform(X) | |
ica_result.shape |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# TODO: Import GaussianMixture | |
from sklearn.mixture import GaussianMixture | |
# TODO: Create an instance of Gaussian Mixture with 3 components | |
gmm = GaussianMixture(n_components=3).fit(X) | |
# TODO: fit the dataset | |
gmm = gmm.fit(X) | |
# TODO: predict the clustering labels for the dataset |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn import datasets, cluster | |
#Load Dataset | |
X = datasets.load_iris().data[:10] | |
#Specify the parameters for clustering. | |
#'ward' linkage is default but 'complete' and 'average' can be used too. | |
clust = cluster.AgglomerateClustering(n_clusters = 3, linkage = 'ward') | |
labels = clust.fit_predict(X) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.cluster import KMeans | |
# Number of clusters | |
kmeans = KMeans(n_clusters=k) | |
# Fitting the input data | |
kmeans = kmeans.fit(X) | |
# Getting the cluster labels | |
labels = kmeans.predict(X) | |
# Centroid values | |
centroids = kmeans.cluster_centers_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from scipy import stats | |
from sklearn import preprocessing | |
from sklearn.ensemble import ExtraTreesClassifier | |
def variable_selection(df, target, variance_thres, pbs_thres, chi_sqr_thres, feat_imp_thres): | |
def normalize(df): | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.ensemble import ExtraTreesClassifier | |
import matplotlib.pyplot as plt | |
data = pd.read_csv("D://Blogs//train.csv") | |
model = ExtraTreesClassifier() | |
model.fit(X,y) | |
print(model.feature_importances_) #use inbuilt class feature_importances of tree based classifiers |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load libraries | |
from sklearn import datasets | |
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis | |
# Create a function to decide number of components | |
def select_n_components(var_ratio, goal_var: float) -> int: | |
# Set initial variance explained so far | |
total_variance = 0.0 | |
# Set initial number of features |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
from itertools import count | |
import matplotlib | |
matplotlib.use('TkAgg') | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import pandas as pd | |
import collections | |
from matplotlib import style |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
from matplotlib import style | |
style.use('ggplot') | |
import numpy as np | |
colors = 10 * ["g", "r", "c", "b", "k"] | |
class K_Means: |
NewerOlder