Ashwin Naidu ashushekar

## process10.py
    # Apply tSNE from Manifold learning for better visualization
    tsne = TSNE(random_state=21)
    # use fit_transform instead of fit, as TSNE has no transform method
    trainX_tsne = tsne.fit_transform(trainX)
    plt.figure(figsize=(10, 10))
    plt.xlim(trainX_tsne[:, 0].min(), trainX_tsne[:, 0].max())
    plt.ylim(trainX_tsne[:, 1].min(), trainX_tsne[:, 1].max())
    for i in range(len(trainX_tsne)):
        # actually plot the digits as text instead of using scatter
        plt.text(trainX_tsne[i, 0], trainX_tsne[i, 1], str(trainY[i]),

## process09.py
lda = LinearDiscriminantAnalysis(n_components=2)
lda.fit(trainX, trainY)
# transform the data onto the first two principal components
trainX_lda = lda.transform(trainX)
colors = ["#476A2A", "#7851B8", "#BD3430", "#4A2D4E", "#875525",
          "#A83683", "#4E655E"]
plt.figure(figsize=(10, 10))
plt.xlim(trainX_lda[:, 0].min(), trainX_lda[:, 0].max())
plt.ylim(trainX_lda[:, 1].min(), trainX_lda[:, 1].max())
for i in range(len(trainX_lda)):

## process08.py
lda = LinearDiscriminantAnalysis(n_components=5)
lda.fit(trainX, trainY)
trainX_lda = lda.transform(trainX)
testX_lda = lda.transform(testX)
applyrandomforest(trainX_lda, testX_lda, trainY, testY)

## process07.py
    pca = PCA(n_components=12)
    pca.fit(Xtrain)
    trainX_pca = pca.transform(trainX)
    testX_pca = pca.transform(testX)
    applyrandomforest(trainX_pca, testX_pca, trainY, testY)

## process06.py
# Fitting the PCA algorithm with our Data
pca = PCA()
pca.fit(trainX)
# Plotting the Cumulative Summation of the Explained Variance
plt.figure()
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance')  # for each component
plt.title('Segmentation Dataset Explained Variance')
plt.show(block=True)

## process04.py
def applyrandomforest(trainX, testX, trainY, testY):
    """
        Apply Random forest on input dataset.
    """
    start = time.process_time()
    forest = RandomForestClassifier(n_estimators=700, max_features='sqrt', max_depth=15)
    forest.fit(trainX, trainY)
    print("Time Elapsed: %s secs" % (time.process_time() - start))
    prediction = forest.predict(testX)
    print("Classification Report after applying Random Forest: ")

## process03.py
def preprocessdata(data):
    """
        Preprocess the data with StandardScalar and Label Encoder
    :param data: input dataframe of training or test set
    """
    labels = data['LABELS']
    features = data.drop(['LABELS'], axis=1)
    columns = features.columns
    enc = LabelEncoder()
    enc.fit(labels)

## process02.py
     LABELS  REGION-CENTROID-COL  REGION-CENTROID-ROW  REGION-PIXEL-COUNT  SHORT-LINE-DENSITY-5  SHORT-LINE-DENSITY-2  VEDGE-MEAN   VEDGE-SD  HEDGE-MEAN   HEDGE-SD  INTENSITY-MEAN  RAWRED-MEAN  RAWBLUE-MEAN  RAWGREEN-MEAN  EXRED-MEAN  EXBLUE-MEAN  EXGREEN-MEAN  VALUE-MEAN  SATURATION-MEAN  HUE-MEAN
0     WINDOW                 86.0                155.0                   9                   0.0                   0.0    4.277778  10.551853    5.388889  60.240738        8.740741     6.666666     12.888889       6.666666   -6.222222    12.444445     -6.222222   12.888889         0.392385 -2.079296
1        SKY                120.0                 74.0                   9                   0.0                   0.0    0.333335   0.088889    0.500001   0.077778      101.851850    89.111115    123.222220      93.222220  -38.222220    64.111115    -25.888890  123.222220         0.276784 -2.220553
2       PATH                137.0                163.0                   9                   0.0                   0.0

## process01.py
import warnings
import os
import time
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
	# Apply tSNE from Manifold learning for better visualization
	tsne = TSNE(random_state=21)
	# use fit_transform instead of fit, as TSNE has no transform method
	trainX_tsne = tsne.fit_transform(trainX)
	plt.figure(figsize=(10, 10))
	plt.xlim(trainX_tsne[:, 0].min(), trainX_tsne[:, 0].max())
	plt.ylim(trainX_tsne[:, 1].min(), trainX_tsne[:, 1].max())
	for i in range(len(trainX_tsne)):
	# actually plot the digits as text instead of using scatter
	plt.text(trainX_tsne[i, 0], trainX_tsne[i, 1], str(trainY[i]),
	lda = LinearDiscriminantAnalysis(n_components=2)
	lda.fit(trainX, trainY)
	# transform the data onto the first two principal components
	trainX_lda = lda.transform(trainX)
	colors = ["#476A2A", "#7851B8", "#BD3430", "#4A2D4E", "#875525",
	"#A83683", "#4E655E"]
	plt.figure(figsize=(10, 10))
	plt.xlim(trainX_lda[:, 0].min(), trainX_lda[:, 0].max())
	plt.ylim(trainX_lda[:, 1].min(), trainX_lda[:, 1].max())
	for i in range(len(trainX_lda)):
	pca = PCA(n_components=12)
	pca.fit(Xtrain)
	trainX_pca = pca.transform(trainX)
	testX_pca = pca.transform(testX)
	applyrandomforest(trainX_pca, testX_pca, trainY, testY)
	# Fitting the PCA algorithm with our Data
	pca = PCA()
	pca.fit(trainX)
	# Plotting the Cumulative Summation of the Explained Variance
	plt.figure()
	plt.plot(np.cumsum(pca.explained_variance_ratio_))
	plt.xlabel('Number of Components')
	plt.ylabel('Cumulative Explained Variance') # for each component
	plt.title('Segmentation Dataset Explained Variance')
	plt.show(block=True)
	def applyrandomforest(trainX, testX, trainY, testY):
	"""
	Apply Random forest on input dataset.
	"""
	start = time.process_time()
	forest = RandomForestClassifier(n_estimators=700, max_features='sqrt', max_depth=15)
	forest.fit(trainX, trainY)
	print("Time Elapsed: %s secs" % (time.process_time() - start))
	prediction = forest.predict(testX)
	print("Classification Report after applying Random Forest: ")
	def preprocessdata(data):
	"""
	Preprocess the data with StandardScalar and Label Encoder
	:param data: input dataframe of training or test set
	"""
	labels = data['LABELS']
	features = data.drop(['LABELS'], axis=1)
	columns = features.columns
	enc = LabelEncoder()
	enc.fit(labels)
	LABELS REGION-CENTROID-COL REGION-CENTROID-ROW REGION-PIXEL-COUNT SHORT-LINE-DENSITY-5 SHORT-LINE-DENSITY-2 VEDGE-MEAN VEDGE-SD HEDGE-MEAN HEDGE-SD INTENSITY-MEAN RAWRED-MEAN RAWBLUE-MEAN RAWGREEN-MEAN EXRED-MEAN EXBLUE-MEAN EXGREEN-MEAN VALUE-MEAN SATURATION-MEAN HUE-MEAN
	0 WINDOW 86.0 155.0 9 0.0 0.0 4.277778 10.551853 5.388889 60.240738 8.740741 6.666666 12.888889 6.666666 -6.222222 12.444445 -6.222222 12.888889 0.392385 -2.079296
	1 SKY 120.0 74.0 9 0.0 0.0 0.333335 0.088889 0.500001 0.077778 101.851850 89.111115 123.222220 93.222220 -38.222220 64.111115 -25.888890 123.222220 0.276784 -2.220553
	2 PATH 137.0 163.0 9 0.0 0.0
	import warnings
	import os
	import time
	import numpy as np
	import pandas as pd
	from sklearn.preprocessing import LabelEncoder, StandardScaler
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.metrics import classification_report
	from sklearn.decomposition import PCA
	from sklearn.discriminant_analysis import LinearDiscriminantAnalysis