Mashimo/ Support Vector Machines

## Support Vector Machines
Support vector machines are a set of supervised learning algorithms that you can use for classification, regression and outlier detection purposes. SciKit-Learn has many classes for SVM usage, depending on your purpose. The one we'll be focusing on is Support Vector Classifier, SVC.

## OCRviaSVC.py
"""
 In 1982, the first computer-driven, OCR machine got installed by  the United
 States Postal Service (USPS) in Los Angeles
 and by the end of 1984, over 250 OCRs machines were installed in 118 major
 mail processing centers across the country.

 Let's see if it's possible to train a support vector classifier in a few seconds
 using machine learning, and if the classification accuracy is similar or better
 than the advertised USPS stats.

 """
import pandas as pd

# The Dataset comes from:
# https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits


def load(path_test, path_train):
  # Load up the data.
  with open(path_test, 'r')  as f: testing  = pd.read_csv(f)
  with open(path_train, 'r') as f: training = pd.read_csv(f)

  # The number of samples between training and testing can vary
  # But the number of features better remain the same!
  n_features = testing.shape[1]

  X_test  = testing.ix[:,:n_features-1]
  X_train = training.ix[:,:n_features-1]
  y_test  = testing.ix[:,n_features-1:].values.ravel()
  y_train = training.ix[:,n_features-1:].values.ravel()

  return X_train, X_test, y_train, y_test


def peekData(X_train):
  # The 'targets' or labels are stored in y. The 'samples' or data is stored in X
  print ("Peeking your data...")
  fig = plt.figure()

  cnt = 0
  for col in range(5):
    for row in range(10):
      plt.subplot(5, 10, cnt + 1)
      plt.imshow(X_train.ix[cnt,:].reshape(8,8), cmap=plt.cm.gray_r, interpolation='nearest')
      plt.axis('off')
      cnt += 1
  fig.set_tight_layout(True)
  plt.show()


def drawPredictions(model, X_train, X_test, y_train, y_test):
  fig = plt.figure()

  # Make some guesses
  y_guess = model.predict(X_test)


  num_rows = 10
  num_cols = 5

  index = 0
  for col in range(num_cols):
    for row in range(num_rows):
      plt.subplot(num_cols, num_rows, index + 1)

      # 8x8 is the size of the image, 64 pixels
      plt.imshow(X_test.ix[index,:].reshape(8,8), cmap=plt.cm.gray_r, interpolation='nearest')

      # Green = Guessed right
      # Red = Fail!
      fontcolor = 'g' if y_test[index] == y_guess[index] else 'r'
      plt.title('Label: %i' % y_guess[index], fontsize=6, color=fontcolor)
      plt.axis('off')
      index += 1
  fig.set_tight_layout(True)
  plt.show()


#
# : Pass in the file paths to the .tes and the .tra files
X_train, X_test, y_train, y_test = load('Datasets/optdigits.tes', 'Datasets/optdigits.tra')

import matplotlib.pyplot as plt
from sklearn import svm

#
# Get to know the data.
peekData(X_train)


#
# : Create an SVC classifier.

print ("Training SVC Classifier...")

svc = svm.SVC(kernel='linear', C=1, gamma=0.001)

svc.fit(X_train, y_train)


# : Calculate the score of the SVC against the testing data
print ("Scoring SVC Classifier...")
#
score = svc.score(X_test, y_test)
print ("Score:\n", score)


# Visual Confirmation of accuracy
drawPredictions(svc, X_train, X_test, y_train, y_test)


#
# : Print out the TRUE value of the 1000th digit in the test set
# By TRUE value, we mean, the actual provided label for that sample
#
true_1000th_test_value = y_test[999]
print ("1000th test label: ", true_1000th_test_value)


#
# : Predict the value of the 1000th digit in the test set.
# Was the model's prediction correct?
#
guess_1000th_test_value = svc.predict(X_test[999:1000])

print ("1000th test prediction: ", guess_1000th_test_value)


#
# : Use IMSHOW to display the 1000th test image, so you can
# visually check if it was a hard image, or an easy image
#
plt.imshow(X_test.ix[999,:].reshape(8,8), cmap=plt.cm.gray_r, interpolation='nearest')


#
# USPS has an advertised accuracy score
# of 98% We can beat it.  POLY kernel

svc = svm.SVC(kernel='poly', C=1, gamma=0.001)
svc.fit(X_train, y_train)

# : Calculate the score of the SVC against the testing data
print ("Scoring SVC poly Classifier...")
score = svc.score(X_test, y_test)
print ("Score:\n", score)

#
# change  SVC's kernel to 'rbf'

svc = svm.SVC(kernel='rbf', C=1, gamma=0.001)
svc.fit(X_train, y_train)

# : Calculate the score of SVC against the testing data
print ("Scoring SVC rbf Classifier...")
score = svc.score(X_test, y_test)
print ("Score:\n", score)


"""
Next: check out another handwritten digits datasets,
such as The MNIST Database of handwritten digits, and Handwritten Digit
Recognition to see how good the classifier perform on them.

source code to load MNIST - formatted data, such as from the above
two links, below:
"""

def load(path_img, path_lbl):
  import numpy as np
  from array import array
  import struct

  with open(path_lbl, 'rb') as file:
    magic, size = struct.unpack(">II", file.read(8))
    if magic != 2049:
      raise ValueError('Magic number mismatch, expected 2049, got {0}'.format(magic))
    labels = array("B", file.read())

  with open(path_img, 'rb') as file:
    magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
    if magic != 2051:
      raise ValueError('Magic number mismatch, expected 2051, got {0}'.format(magic))
    image_data = array("B", file.read())

  images = []
  for i in range(size):
      images.append([0] * rows * cols)

#You can set divisor to any int, e.g. 1, 2, 3. If you set it to 1,
#there will be no resampling of the image. If you set it to two or higher,
#the image will be resamples by that factor of pixels. This, in turn,
#speeds up training but may reduce overall accuracy.
  divisor = 1
  for i in range(size):
      images[i] = np.array(image_data[i * rows * cols:(i + 1) * rows * cols]).reshape(28,28)[::divisor,::divisor].reshape(-1)
  return pd.DataFrame(images), pd.Series(labels)


#X, y = load('digits.data', 'digits.labels')


## parkinson.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 12 21:55:24 2017

@author: Massimo

Apply SVC to the Parkinson's Data Set, provided courtesy of UCI's Machine Learning
Repository. The dataset was created at the University of Oxford, in collaboration
with 10 medical centers around the US, along with Intel who developed the device
used to record the primary features of the dataset: speech signals.
https://archive.ics.uci.edu/ml/datasets/Parkinsons
Goals: first to see if it's possible to differentiate between people who have
Parkinson's and who don't using SciKit-Learn's support vector classifier
and then to take a first-stab at a naive way of fine-tuning your parameters in
an attempt to maximize the accuracy of the testing set.
"""
import pandas as pd

X = pd.read_csv("Datasets/parkinsons.data")

X.drop(['name'], axis=1, inplace=True) # drop name column

y = X.status.copy() # copy “y” values out from status

X.drop(['status'], axis=1, inplace=True) # drop status column

# Perform a train/test split. 30% test group size, with a random_state equal to 7.

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
                                                    random_state=7)

from sklearn import preprocessing


  # tried with different scaler, standard is the best
scaler = preprocessing.StandardScaler() # 0.932203389831
#scaler = preprocessing.MinMaxScaler() # 0.881355932203
#scaler = preprocessing.MaxAbsScaler() # 0.881355932203
#scaler = preprocessing.Normalizer() # 0.796610169492
#scaler = preprocessing.KernelCenterer() # 0.915254237288

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

from sklearn.decomposition import PCA
from sklearn import manifold

usePCA = False # change this to use PCA as dimensionality reducer

if usePCA:
    reducer = PCA(n_components=7).fit(X_train)
else:
    reducer = manifold.Isomap(n_neighbors=3, n_components=6).fit(X_train)

X_train = reducer.transform(X_train)
X_test  = reducer.transform(X_test)

# Score: 0.949152542373 with C=  0.75 gamma =  0.047 n=2,comp=6


# Create a SVC classifier.
# Fit it against the training data and then
# score the testing data.

from sklearn.svm import SVC
import numpy as np

  # a naive, best-parameter search using nested for-loops.
best_score = 0
for c in np.arange(0.05,2,0.05):
    for gamma in np.arange(0.001, 0.1, 0.001):
        svc = SVC(kernel='rbf', C=c, gamma=gamma)

        svc.fit(X_train, y_train)

        score = svc.score(X_test, y_test)
        if score > best_score:
            best_score = score
            print ("Score:", score, "C= ", c, "gamma = ", gamma)

## SVCvsKNN.py
"""
Classify the UCI's wheat-seeds dataset.
First, benchmark how long it takes to train and predict with SVC relative to how long K-Neighbors took to train and test,
and then compare the decision boundary plot produced by the two.
 """

import matplotlib as mpl
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np
import time


#
# INFO: Your Parameters.
# You can adjust them
C = 1
kernel = 'linear'
iterations = 10

#
# INFO: You can set this to false if you want to
# draw the full square matrix
FAST_DRAW = True


def drawPlots(model, X_train, X_test, y_train, y_test, wintitle='Figure 1'):
  # INFO: A convenience function
  # You can use this to break any higher-dimensional space down
  # And view cross sections of it.

  # If this line throws an error, use plt.style.use('ggplot') instead
  mpl.style.use('ggplot') # Look Pretty

  padding = 3
  resolution = 0.5
  max_2d_score = 0
  score = 0


  y_colors = ['#ff0000', '#00ff00', '#0000ff']
  my_cmap = mpl.colors.ListedColormap(['#ffaaaa', '#aaffaa', '#aaaaff'])
  colors = [y_colors[i] for i in y_train]
  num_columns = len(X_train.columns)

  fig = plt.figure()
  fig.canvas.set_window_title(wintitle)

  cnt = 0
  for col in range(num_columns):
    for row in range(num_columns):
      # Easy out
      if FAST_DRAW and col > row:
        cnt += 1
        continue

      ax = plt.subplot(num_columns, num_columns, cnt + 1)
      plt.xticks(())
      plt.yticks(())

      # Intersection:
      if col == row:
        plt.text(0.5, 0.5, X_train.columns[row], verticalalignment='center', horizontalalignment='center', fontsize=12)
        cnt += 1
        continue


      # Only select two features to display, then train the model
      X_train_bag = X_train.ix[:, [row,col]]
      X_test_bag = X_test.ix[:, [row,col]]
      model.fit(X_train_bag, y_train)

      # Create a mesh to plot in
      x_min, x_max = X_train_bag.ix[:, 0].min() - padding, X_train_bag.ix[:, 0].max() + padding
      y_min, y_max = X_train_bag.ix[:, 1].min() - padding, X_train_bag.ix[:, 1].max() + padding
      xx, yy = np.meshgrid(np.arange(x_min, x_max, resolution),
                           np.arange(y_min, y_max, resolution))

      # Plot Boundaries
      plt.xlim(xx.min(), xx.max())
      plt.ylim(yy.min(), yy.max())

      # Prepare the contour
      Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
      Z = Z.reshape(xx.shape)
      plt.contourf(xx, yy, Z, cmap=my_cmap, alpha=0.8)
      plt.scatter(X_train_bag.ix[:, 0], X_train_bag.ix[:, 1], c=colors, alpha=0.5)


      score = round(model.score(X_test_bag, y_test) * 100, 3)
      plt.text(0.5, 0, "Score: {0}".format(score), transform = ax.transAxes, horizontalalignment='center', fontsize=8)
      max_2d_score = score if score > max_2d_score else max_2d_score

      cnt += 1

  print ("Max 2D Score: ", max_2d_score)
  fig.set_tight_layout(True)


def benchmark(model, X_train, X_test, y_train, y_test, wintitle='Figure 1'):
  print ('\n\n' + wintitle + ' Results')

  # the only purpose of doing many iterations was to get a more accurate
  # count of the time it took for each classifier
  s = time.time()
  for i in range(iterations):
    #
    # : train the classifier on the training data / labels:
    #
    model.fit(X_train, y_train)


  print ("{0} Iterations Training Time: ".format(iterations), time.time() - s)


  scoreBch = 0

  s = time.time()
  for i in range(iterations):
    #
    # : score the classifier on the testing data / labels:
    #
    # .. your code here ..
    scoreBch = model.score(X_test, y_test)


  print ("{0} Iterations Scoring Time: ".format(iterations), time.time() - s)
  print ("High-Dimensionality Score: ", round((scoreBch*100), 3))


#
# : Load up the wheat dataset into dataframe 'X'
#
df = pd.read_csv("Datasets/wheat.data", index_col='id')


# INFO: An easy way to show which rows have nans in them
print (df[pd.isnull(df).any(axis=1)])


#
# : Go ahead and drop any row with a nan
#
df.dropna(axis=0, inplace=True)


#
# INFO: you might try setting the nan values to the
# mean value of that column, the mean should only be calculated for
# the specific class rather than across all classes, now that you
# have the labels


#
# : Copy the labels out of the dset into variable 'y' then Remove
# them from X. Encode the labels -- canadian:0, kama:1, and rosa:2
#
labels = df.wheat_type.copy() # copy “y” values out
df.drop(['wheat_type'], axis=1, inplace=True) # drop output column

labels = labels.map({'canadian':0, 'kama':1, 'rosa':2})


#
# : Split data into test / train sets
#
from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(df, labels, test_size=0.3,
                                                    random_state=7)


#
# : Create an KNeighbors classifier
#
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=5)


benchmark(knn, X_train, X_test, y_train, y_test, 'KNeighbors')
drawPlots(knn, X_train, X_test, y_train, y_test, 'KNeighbors')

#
# : Create an SVC classifier
# Use a linear kernel, and set the C value to C
#
from sklearn.svm import SVC

svc = SVC(kernel='linear', C=C)

benchmark(svc, X_train, X_test, y_train, y_test, 'SVC')
drawPlots(svc, X_train, X_test, y_train, y_test, 'SVC')

plt.show()


"""
SVC in high dimensions, even with a provided kernel, still attempts to find
the best linearly separable plane to split your classes.
If you have 'dirty' features thrown into the mix, it's entirely possible they
will end up hurting your overall SVC performance, as opposed to just having a
few really good features.

KNeighbors Results
5000 Iterations Training Time: 1.88873505592
5000 Iterations Scoring Time: 3.78048992157
High-Dimensionality Score: 83.607
Max 2D Score: 90.164
SVC Results
5000 Iterations Training Time: 3.79915714264
5000 Iterations Scoring Time: 1.65462088585
High-Dimensionality Score: 86.885
Max 2D Score: 93.443
"""
	"""
	In 1982, the first computer-driven, OCR machine got installed by the United
	States Postal Service (USPS) in Los Angeles
	and by the end of 1984, over 250 OCRs machines were installed in 118 major
	mail processing centers across the country.

	Let's see if it's possible to train a support vector classifier in a few seconds
	using machine learning, and if the classification accuracy is similar or better
	than the advertised USPS stats.

	"""
	import pandas as pd

	# The Dataset comes from:
	# https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits



	def load(path_test, path_train):
	# Load up the data.
	with open(path_test, 'r') as f: testing = pd.read_csv(f)
	with open(path_train, 'r') as f: training = pd.read_csv(f)

	# The number of samples between training and testing can vary
	# But the number of features better remain the same!
	n_features = testing.shape[1]

	X_test = testing.ix[:,:n_features-1]
	X_train = training.ix[:,:n_features-1]
	y_test = testing.ix[:,n_features-1:].values.ravel()
	y_train = training.ix[:,n_features-1:].values.ravel()

	return X_train, X_test, y_train, y_test


	def peekData(X_train):
	# The 'targets' or labels are stored in y. The 'samples' or data is stored in X
	print ("Peeking your data...")
	fig = plt.figure()

	cnt = 0
	for col in range(5):
	for row in range(10):
	plt.subplot(5, 10, cnt + 1)
	plt.imshow(X_train.ix[cnt,:].reshape(8,8), cmap=plt.cm.gray_r, interpolation='nearest')
	plt.axis('off')
	cnt += 1
	fig.set_tight_layout(True)
	plt.show()


	def drawPredictions(model, X_train, X_test, y_train, y_test):
	fig = plt.figure()

	# Make some guesses
	y_guess = model.predict(X_test)


	num_rows = 10
	num_cols = 5

	index = 0
	for col in range(num_cols):
	for row in range(num_rows):
	plt.subplot(num_cols, num_rows, index + 1)

	# 8x8 is the size of the image, 64 pixels
	plt.imshow(X_test.ix[index,:].reshape(8,8), cmap=plt.cm.gray_r, interpolation='nearest')

	# Green = Guessed right
	# Red = Fail!
	fontcolor = 'g' if y_test[index] == y_guess[index] else 'r'
	plt.title('Label: %i' % y_guess[index], fontsize=6, color=fontcolor)
	plt.axis('off')
	index += 1
	fig.set_tight_layout(True)
	plt.show()



	#
	# : Pass in the file paths to the .tes and the .tra files
	X_train, X_test, y_train, y_test = load('Datasets/optdigits.tes', 'Datasets/optdigits.tra')

	import matplotlib.pyplot as plt
	from sklearn import svm

	#
	# Get to know the data.
	peekData(X_train)


	#
	# : Create an SVC classifier.

	print ("Training SVC Classifier...")

	svc = svm.SVC(kernel='linear', C=1, gamma=0.001)

	svc.fit(X_train, y_train)



	# : Calculate the score of the SVC against the testing data
	print ("Scoring SVC Classifier...")
	#
	score = svc.score(X_test, y_test)
	print ("Score:\n", score)


	# Visual Confirmation of accuracy
	drawPredictions(svc, X_train, X_test, y_train, y_test)


	#
	# : Print out the TRUE value of the 1000th digit in the test set
	# By TRUE value, we mean, the actual provided label for that sample
	#
	true_1000th_test_value = y_test[999]
	print ("1000th test label: ", true_1000th_test_value)


	#
	# : Predict the value of the 1000th digit in the test set.
	# Was the model's prediction correct?
	#
	guess_1000th_test_value = svc.predict(X_test[999:1000])

	print ("1000th test prediction: ", guess_1000th_test_value)


	#
	# : Use IMSHOW to display the 1000th test image, so you can
	# visually check if it was a hard image, or an easy image
	#
	plt.imshow(X_test.ix[999,:].reshape(8,8), cmap=plt.cm.gray_r, interpolation='nearest')


	#
	# USPS has an advertised accuracy score
	# of 98% We can beat it. POLY kernel

	svc = svm.SVC(kernel='poly', C=1, gamma=0.001)
	svc.fit(X_train, y_train)

	# : Calculate the score of the SVC against the testing data
	print ("Scoring SVC poly Classifier...")
	score = svc.score(X_test, y_test)
	print ("Score:\n", score)

	#
	# change SVC's kernel to 'rbf'

	svc = svm.SVC(kernel='rbf', C=1, gamma=0.001)
	svc.fit(X_train, y_train)

	# : Calculate the score of SVC against the testing data
	print ("Scoring SVC rbf Classifier...")
	score = svc.score(X_test, y_test)
	print ("Score:\n", score)




	"""
	Next: check out another handwritten digits datasets,
	such as The MNIST Database of handwritten digits, and Handwritten Digit
	Recognition to see how good the classifier perform on them.

	source code to load MNIST - formatted data, such as from the above
	two links, below:
	"""

	def load(path_img, path_lbl):
	import numpy as np
	from array import array
	import struct

	with open(path_lbl, 'rb') as file:
	magic, size = struct.unpack(">II", file.read(8))
	if magic != 2049:
	raise ValueError('Magic number mismatch, expected 2049, got {0}'.format(magic))
	labels = array("B", file.read())

	with open(path_img, 'rb') as file:
	magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
	if magic != 2051:
	raise ValueError('Magic number mismatch, expected 2051, got {0}'.format(magic))
	image_data = array("B", file.read())

	images = []
	for i in range(size):
	images.append([0] * rows * cols)

	#You can set divisor to any int, e.g. 1, 2, 3. If you set it to 1,
	#there will be no resampling of the image. If you set it to two or higher,
	#the image will be resamples by that factor of pixels. This, in turn,
	#speeds up training but may reduce overall accuracy.
	divisor = 1
	for i in range(size):
	images[i] = np.array(image_data[i * rows * cols:(i + 1) * rows * cols]).reshape(28,28)[::divisor,::divisor].reshape(-1)
	return pd.DataFrame(images), pd.Series(labels)



	#X, y = load('digits.data', 'digits.labels')
	#!/usr/bin/env python3
	# -- coding: utf-8 --
	"""
	Created on Wed Apr 12 21:55:24 2017

	@author: Massimo

	Apply SVC to the Parkinson's Data Set, provided courtesy of UCI's Machine Learning
	Repository. The dataset was created at the University of Oxford, in collaboration
	with 10 medical centers around the US, along with Intel who developed the device
	used to record the primary features of the dataset: speech signals.
	https://archive.ics.uci.edu/ml/datasets/Parkinsons
	Goals: first to see if it's possible to differentiate between people who have
	Parkinson's and who don't using SciKit-Learn's support vector classifier
	and then to take a first-stab at a naive way of fine-tuning your parameters in
	an attempt to maximize the accuracy of the testing set.
	"""
	import pandas as pd

	X = pd.read_csv("Datasets/parkinsons.data")

	X.drop(['name'], axis=1, inplace=True) # drop name column

	y = X.status.copy() # copy “y” values out from status

	X.drop(['status'], axis=1, inplace=True) # drop status column

	# Perform a train/test split. 30% test group size, with a random_state equal to 7.

	from sklearn.model_selection import train_test_split

	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
	random_state=7)

	from sklearn import preprocessing


	# tried with different scaler, standard is the best
	scaler = preprocessing.StandardScaler() # 0.932203389831
	#scaler = preprocessing.MinMaxScaler() # 0.881355932203
	#scaler = preprocessing.MaxAbsScaler() # 0.881355932203
	#scaler = preprocessing.Normalizer() # 0.796610169492
	#scaler = preprocessing.KernelCenterer() # 0.915254237288

	X_train = scaler.fit_transform(X_train)
	X_test = scaler.transform(X_test)

	from sklearn.decomposition import PCA
	from sklearn import manifold

	usePCA = False # change this to use PCA as dimensionality reducer

	if usePCA:
	reducer = PCA(n_components=7).fit(X_train)
	else:
	reducer = manifold.Isomap(n_neighbors=3, n_components=6).fit(X_train)

	X_train = reducer.transform(X_train)
	X_test = reducer.transform(X_test)

	# Score: 0.949152542373 with C= 0.75 gamma = 0.047 n=2,comp=6


	# Create a SVC classifier.
	# Fit it against the training data and then
	# score the testing data.

	from sklearn.svm import SVC
	import numpy as np

	# a naive, best-parameter search using nested for-loops.
	best_score = 0
	for c in np.arange(0.05,2,0.05):
	for gamma in np.arange(0.001, 0.1, 0.001):
	svc = SVC(kernel='rbf', C=c, gamma=gamma)

	svc.fit(X_train, y_train)

	score = svc.score(X_test, y_test)
	if score > best_score:
	best_score = score
	print ("Score:", score, "C= ", c, "gamma = ", gamma)
	"""
	Classify the UCI's wheat-seeds dataset.
	First, benchmark how long it takes to train and predict with SVC relative to how long K-Neighbors took to train and test,
	and then compare the decision boundary plot produced by the two.
	"""

	import matplotlib as mpl
	import matplotlib.pyplot as plt

	import pandas as pd
	import numpy as np
	import time


	#
	# INFO: Your Parameters.
	# You can adjust them
	C = 1
	kernel = 'linear'
	iterations = 10

	#
	# INFO: You can set this to false if you want to
	# draw the full square matrix
	FAST_DRAW = True



	def drawPlots(model, X_train, X_test, y_train, y_test, wintitle='Figure 1'):
	# INFO: A convenience function
	# You can use this to break any higher-dimensional space down
	# And view cross sections of it.

	# If this line throws an error, use plt.style.use('ggplot') instead
	mpl.style.use('ggplot') # Look Pretty

	padding = 3
	resolution = 0.5
	max_2d_score = 0
	score = 0


	y_colors = ['#ff0000', '#00ff00', '#0000ff']
	my_cmap = mpl.colors.ListedColormap(['#ffaaaa', '#aaffaa', '#aaaaff'])
	colors = [y_colors[i] for i in y_train]
	num_columns = len(X_train.columns)

	fig = plt.figure()
	fig.canvas.set_window_title(wintitle)

	cnt = 0
	for col in range(num_columns):
	for row in range(num_columns):
	# Easy out
	if FAST_DRAW and col > row:
	cnt += 1
	continue

	ax = plt.subplot(num_columns, num_columns, cnt + 1)
	plt.xticks(())
	plt.yticks(())

	# Intersection:
	if col == row:
	plt.text(0.5, 0.5, X_train.columns[row], verticalalignment='center', horizontalalignment='center', fontsize=12)
	cnt += 1
	continue


	# Only select two features to display, then train the model
	X_train_bag = X_train.ix[:, [row,col]]
	X_test_bag = X_test.ix[:, [row,col]]
	model.fit(X_train_bag, y_train)

	# Create a mesh to plot in
	x_min, x_max = X_train_bag.ix[:, 0].min() - padding, X_train_bag.ix[:, 0].max() + padding
	y_min, y_max = X_train_bag.ix[:, 1].min() - padding, X_train_bag.ix[:, 1].max() + padding
	xx, yy = np.meshgrid(np.arange(x_min, x_max, resolution),
	np.arange(y_min, y_max, resolution))

	# Plot Boundaries
	plt.xlim(xx.min(), xx.max())
	plt.ylim(yy.min(), yy.max())

	# Prepare the contour
	Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
	Z = Z.reshape(xx.shape)
	plt.contourf(xx, yy, Z, cmap=my_cmap, alpha=0.8)
	plt.scatter(X_train_bag.ix[:, 0], X_train_bag.ix[:, 1], c=colors, alpha=0.5)


	score = round(model.score(X_test_bag, y_test) * 100, 3)
	plt.text(0.5, 0, "Score: {0}".format(score), transform = ax.transAxes, horizontalalignment='center', fontsize=8)
	max_2d_score = score if score > max_2d_score else max_2d_score

	cnt += 1

	print ("Max 2D Score: ", max_2d_score)
	fig.set_tight_layout(True)


	def benchmark(model, X_train, X_test, y_train, y_test, wintitle='Figure 1'):
	print ('\n\n' + wintitle + ' Results')

	# the only purpose of doing many iterations was to get a more accurate
	# count of the time it took for each classifier
	s = time.time()
	for i in range(iterations):
	#
	# : train the classifier on the training data / labels:
	#
	model.fit(X_train, y_train)


	print ("{0} Iterations Training Time: ".format(iterations), time.time() - s)


	scoreBch = 0

	s = time.time()
	for i in range(iterations):
	#
	# : score the classifier on the testing data / labels:
	#
	# .. your code here ..
	scoreBch = model.score(X_test, y_test)


	print ("{0} Iterations Scoring Time: ".format(iterations), time.time() - s)
	print ("High-Dimensionality Score: ", round((scoreBch*100), 3))



	#
	# : Load up the wheat dataset into dataframe 'X'
	#
	df = pd.read_csv("Datasets/wheat.data", index_col='id')


	# INFO: An easy way to show which rows have nans in them
	print (df[pd.isnull(df).any(axis=1)])


	#
	# : Go ahead and drop any row with a nan
	#
	df.dropna(axis=0, inplace=True)


	#
	# INFO: you might try setting the nan values to the
	# mean value of that column, the mean should only be calculated for
	# the specific class rather than across all classes, now that you
	# have the labels



	#
	# : Copy the labels out of the dset into variable 'y' then Remove
	# them from X. Encode the labels -- canadian:0, kama:1, and rosa:2
	#
	labels = df.wheat_type.copy() # copy “y” values out
	df.drop(['wheat_type'], axis=1, inplace=True) # drop output column

	labels = labels.map({'canadian':0, 'kama':1, 'rosa':2})


	#
	# : Split data into test / train sets
	#
	from sklearn.model_selection import train_test_split


	X_train, X_test, y_train, y_test = train_test_split(df, labels, test_size=0.3,
	random_state=7)


	#
	# : Create an KNeighbors classifier
	#
	from sklearn.neighbors import KNeighborsClassifier

	knn = KNeighborsClassifier(n_neighbors=5)


	benchmark(knn, X_train, X_test, y_train, y_test, 'KNeighbors')
	drawPlots(knn, X_train, X_test, y_train, y_test, 'KNeighbors')

	#
	# : Create an SVC classifier
	# Use a linear kernel, and set the C value to C
	#
	from sklearn.svm import SVC

	svc = SVC(kernel='linear', C=C)

	benchmark(svc, X_train, X_test, y_train, y_test, 'SVC')
	drawPlots(svc, X_train, X_test, y_train, y_test, 'SVC')

	plt.show()


	"""
	SVC in high dimensions, even with a provided kernel, still attempts to find
	the best linearly separable plane to split your classes.
	If you have 'dirty' features thrown into the mix, it's entirely possible they
	will end up hurting your overall SVC performance, as opposed to just having a
	few really good features.

	KNeighbors Results
	5000 Iterations Training Time: 1.88873505592
	5000 Iterations Scoring Time: 3.78048992157
	High-Dimensionality Score: 83.607
	Max 2D Score: 90.164
	SVC Results
	5000 Iterations Training Time: 3.79915714264
	5000 Iterations Scoring Time: 1.65462088585
	High-Dimensionality Score: 86.885
	Max 2D Score: 93.443
	"""