Skip to content

Instantly share code, notes, and snippets.

# Apply tSNE from Manifold learning for better visualization
tsne = TSNE(random_state=21)
# use fit_transform instead of fit, as TSNE has no transform method
trainX_tsne = tsne.fit_transform(trainX)
plt.figure(figsize=(10, 10))
plt.xlim(trainX_tsne[:, 0].min(), trainX_tsne[:, 0].max())
plt.ylim(trainX_tsne[:, 1].min(), trainX_tsne[:, 1].max())
for i in range(len(trainX_tsne)):
# actually plot the digits as text instead of using scatter
plt.text(trainX_tsne[i, 0], trainX_tsne[i, 1], str(trainY[i]),
lda = LinearDiscriminantAnalysis(n_components=2)
lda.fit(trainX, trainY)
# transform the data onto the first two principal components
trainX_lda = lda.transform(trainX)
colors = ["#476A2A", "#7851B8", "#BD3430", "#4A2D4E", "#875525",
"#A83683", "#4E655E"]
plt.figure(figsize=(10, 10))
plt.xlim(trainX_lda[:, 0].min(), trainX_lda[:, 0].max())
plt.ylim(trainX_lda[:, 1].min(), trainX_lda[:, 1].max())
for i in range(len(trainX_lda)):
lda = LinearDiscriminantAnalysis(n_components=5)
lda.fit(trainX, trainY)
trainX_lda = lda.transform(trainX)
testX_lda = lda.transform(testX)
applyrandomforest(trainX_lda, testX_lda, trainY, testY)
pca = PCA(n_components=12)
pca.fit(Xtrain)
trainX_pca = pca.transform(trainX)
testX_pca = pca.transform(testX)
applyrandomforest(trainX_pca, testX_pca, trainY, testY)
# Fitting the PCA algorithm with our Data
pca = PCA()
pca.fit(trainX)
# Plotting the Cumulative Summation of the Explained Variance
plt.figure()
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance') # for each component
plt.title('Segmentation Dataset Explained Variance')
plt.show(block=True)
def applyrandomforest(trainX, testX, trainY, testY):
"""
Apply Random forest on input dataset.
"""
start = time.process_time()
forest = RandomForestClassifier(n_estimators=700, max_features='sqrt', max_depth=15)
forest.fit(trainX, trainY)
print("Time Elapsed: %s secs" % (time.process_time() - start))
prediction = forest.predict(testX)
print("Classification Report after applying Random Forest: ")
def preprocessdata(data):
"""
Preprocess the data with StandardScalar and Label Encoder
:param data: input dataframe of training or test set
"""
labels = data['LABELS']
features = data.drop(['LABELS'], axis=1)
columns = features.columns
enc = LabelEncoder()
enc.fit(labels)
LABELS REGION-CENTROID-COL REGION-CENTROID-ROW REGION-PIXEL-COUNT SHORT-LINE-DENSITY-5 SHORT-LINE-DENSITY-2 VEDGE-MEAN VEDGE-SD HEDGE-MEAN HEDGE-SD INTENSITY-MEAN RAWRED-MEAN RAWBLUE-MEAN RAWGREEN-MEAN EXRED-MEAN EXBLUE-MEAN EXGREEN-MEAN VALUE-MEAN SATURATION-MEAN HUE-MEAN
0 WINDOW 86.0 155.0 9 0.0 0.0 4.277778 10.551853 5.388889 60.240738 8.740741 6.666666 12.888889 6.666666 -6.222222 12.444445 -6.222222 12.888889 0.392385 -2.079296
1 SKY 120.0 74.0 9 0.0 0.0 0.333335 0.088889 0.500001 0.077778 101.851850 89.111115 123.222220 93.222220 -38.222220 64.111115 -25.888890 123.222220 0.276784 -2.220553
2 PATH 137.0 163.0 9 0.0 0.0
import warnings
import os
import time
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis