DavidSanf0rd/DMC.py

## DMC.py
import sklearn
import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools

import numpy as np
from sklearn import datasets
from sklearn.neighbors import NearestCentroid

n_neighbors = 15

iris = datasets.load_iris()
X = iris.data[:, :2]  # we only take the first two features. We could
                      # avoid this ugly slicing by using a two-dim dataset
y = iris.target

h = .02  # step size in the mesh

cmap_light =[[0, '#FFAAAA'], [0.5, '#AAFFAA'], [1, '#AAAAFF']]
cmap_bold = [[0, '#FF0000'], [0.5, '#00FF00'], [1, '#0000FF']]

data = []
titles = []
i = 0

for shrinkage in [None, .2]:
    clf = NearestCentroid(shrink_threshold=shrinkage)
    clf.fit(X, y)
    y_pred = clf.predict(X)
    print(shrinkage, np.mean(y == y_pred))
    # Plot the decision boundary. For that, we will assign a color to each
    # point in the mesh [x_min, x_max]x[y_min, y_max].
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    x_ = np.arange(x_min, x_max, h)
    y_ = np.arange(y_min, y_max, h)
    xx, yy = np.meshgrid(x_, y_)
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

    Z = Z.reshape(xx.shape)
    data.append([])
    p1 = go.Heatmap(x=x_, y=y_, z=Z,
                    showscale=False,
                    colorscale=cmap_light)

    p2 = go.Scatter(x=X[:, 0], y=X[:, 1],
                    mode='markers',
                    marker=dict(color=X[:, 0],
                                colorscale=cmap_bold,
                                line=dict(color='black', width=1)))
    data[i].append(p1)
    data[i].append(p2)
    titles.append("3-Class classification (shrink_threshold=%r)"
                   % shrinkage)
    i+=1

fig = tools.make_subplots(rows=1, cols=2,
                          subplot_titles=tuple(titles),
                          print_grid=False)

for i in range(0, len(data)):
    for j in range(0, len(data[i])):
        fig.append_trace(data[i][j], 1, i+1)

fig['layout'].update(height=700, hovermode='closest',
                     showlegend=False)


py.iplot(fig)

## KNN.py
import math
from csv import reader
from random import random, randint
import operator


    def load_and_parse(self):
        with open(self.filename, 'rb') as data_set:
            lines = reader(data_set)
            data_set = list(lines)
            for x in range(len(data_set) - 1):
                for y in range(0, 4, 1):
                    data_set[x][y] = float(data_set[x][y])
                if random() < self.ratio:
                    self.training_set.append(data_set[x])
                else:
                    self.test_set.append(data_set[x])

    """
    Returns a random test point for future prediction
    """
    def random_test_point(self):
        return self.test_set[randint(0, len(self.test_set) - 1)]

    """
    Calculates similarity using Euclidean distance
    """
    @staticmethod
    def distance(a, b, size):
        d = 0
        for x in range(size):
            d += pow((a[x] - b[x]), 2)
        return math.sqrt(d)

    """
    Finds the k most similar instance to the given test point
    """
    def get_neighbors(self, test_point):
        distances = []
        size = len(test_point) - 1
        for x in range(len(self.training_set)):
            dist = self.distance(test_point, self.training_set[x], size)
            distances.append((self.training_set[x], dist))
        distances.sort(key=operator.itemgetter(1))
        neighbors = []
        for x in range(self.k):
            neighbors.append(distances[x][0])
        return neighbors

    """
    Returns the class to which a test point belongs to based on its neighbors
    """
    @staticmethod
    def get_class(neighbors):
        votes = {}
        for x in range(len(neighbors)):
            klass = neighbors[x][-1]
            if klass in votes:
                votes[klass] += 1
            else:
                votes[klass] = 1
        sorted_votes = sorted(votes.iteritems(), key=operator.itemgetter(1), reverse=True)
        return sorted_votes[0][0]

    """
    Predicts the class of a given test point
    """
    def predict(self, test_point):
        neighbors = self.get_neighbors(test_point)
        prediction = self.get_class(neighbors)
        return prediction


knn = KNN(k=3, filename='iris.csv', ratio=0.7)
knn.load_and_parse()
test_point = knn.random_test_point()
print test_point
print knn.predict(test_point)
	import sklearn
	import plotly.plotly as py
	import plotly.graph_objs as go
	from plotly import tools

	import numpy as np
	from sklearn import datasets
	from sklearn.neighbors import NearestCentroid

	n_neighbors = 15

	iris = datasets.load_iris()
	X = iris.data[:, :2] # we only take the first two features. We could
	# avoid this ugly slicing by using a two-dim dataset
	y = iris.target

	h = .02 # step size in the mesh

	cmap_light =[[0, '#FFAAAA'], [0.5, '#AAFFAA'], [1, '#AAAAFF']]
	cmap_bold = [[0, '#FF0000'], [0.5, '#00FF00'], [1, '#0000FF']]

	data = []
	titles = []
	i = 0

	for shrinkage in [None, .2]:
	clf = NearestCentroid(shrink_threshold=shrinkage)
	clf.fit(X, y)
	y_pred = clf.predict(X)
	print(shrinkage, np.mean(y == y_pred))
	# Plot the decision boundary. For that, we will assign a color to each
	# point in the mesh [x_min, x_max]x[y_min, y_max].
	x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
	y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
	x_ = np.arange(x_min, x_max, h)
	y_ = np.arange(y_min, y_max, h)
	xx, yy = np.meshgrid(x_, y_)
	Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

	Z = Z.reshape(xx.shape)
	data.append([])
	p1 = go.Heatmap(x=x_, y=y_, z=Z,
	showscale=False,
	colorscale=cmap_light)

	p2 = go.Scatter(x=X[:, 0], y=X[:, 1],
	mode='markers',
	marker=dict(color=X[:, 0],
	colorscale=cmap_bold,
	line=dict(color='black', width=1)))
	data[i].append(p1)
	data[i].append(p2)
	titles.append("3-Class classification (shrink_threshold=%r)"
	% shrinkage)
	i+=1

	fig = tools.make_subplots(rows=1, cols=2,
	subplot_titles=tuple(titles),
	print_grid=False)

	for i in range(0, len(data)):
	for j in range(0, len(data[i])):
	fig.append_trace(data[i][j], 1, i+1)

	fig['layout'].update(height=700, hovermode='closest',
	showlegend=False)


	py.iplot(fig)
	import math
	from csv import reader
	from random import random, randint
	import operator



	def load_and_parse(self):
	with open(self.filename, 'rb') as data_set:
	lines = reader(data_set)
	data_set = list(lines)
	for x in range(len(data_set) - 1):
	for y in range(0, 4, 1):
	data_set[x][y] = float(data_set[x][y])
	if random() < self.ratio:
	self.training_set.append(data_set[x])
	else:
	self.test_set.append(data_set[x])

	"""
	Returns a random test point for future prediction
	"""
	def random_test_point(self):
	return self.test_set[randint(0, len(self.test_set) - 1)]

	"""
	Calculates similarity using Euclidean distance
	"""
	@staticmethod
	def distance(a, b, size):
	d = 0
	for x in range(size):
	d += pow((a[x] - b[x]), 2)
	return math.sqrt(d)

	"""
	Finds the k most similar instance to the given test point
	"""
	def get_neighbors(self, test_point):
	distances = []
	size = len(test_point) - 1
	for x in range(len(self.training_set)):
	dist = self.distance(test_point, self.training_set[x], size)
	distances.append((self.training_set[x], dist))
	distances.sort(key=operator.itemgetter(1))
	neighbors = []
	for x in range(self.k):
	neighbors.append(distances[x][0])
	return neighbors

	"""
	Returns the class to which a test point belongs to based on its neighbors
	"""
	@staticmethod
	def get_class(neighbors):
	votes = {}
	for x in range(len(neighbors)):
	klass = neighbors[x][-1]
	if klass in votes:
	votes[klass] += 1
	else:
	votes[klass] = 1
	sorted_votes = sorted(votes.iteritems(), key=operator.itemgetter(1), reverse=True)
	return sorted_votes[0][0]

	"""
	Predicts the class of a given test point
	"""
	def predict(self, test_point):
	neighbors = self.get_neighbors(test_point)
	prediction = self.get_class(neighbors)
	return prediction


	knn = KNN(k=3, filename='iris.csv', ratio=0.7)
	knn.load_and_parse()
	test_point = knn.random_test_point()
	print test_point
	print knn.predict(test_point)