RichardKelley/ml_utils.py

## ml_utils.py
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from matplotlib.patches import Ellipse
from math import sqrt

def build_dataset(n_samples, n_features, n_classes = 1):
    '''
    Create a dataset.

    @arg n_samples - the number of points to generate.
    @arg n_features - the dimensionality of the points
    @arg n_classes - the number of classes to generate.
                     Defaults to 1 for "unlabeled" data.

    returns (x,y), where

    x is a numpy array with x.shape = (n_samples, n_features).
    y is a numpy array with y.shape = n_samples.
    '''
    return datasets.make_classification(n_samples=n_samples, n_features = n_features, n_classes = n_classes,
                                        n_informative = n_features, n_redundant = 0, n_repeated = 0,
                                        n_clusters_per_class=1, class_sep=2)


def plot_dataset(dataset):
    if type(dataset) == tuple:
        plt.scatter(dataset[0][:,0], dataset[0][:,1], c = dataset[1], cmap = 'cool', edgecolors="Black")
    elif type(dataset) == np.ndarray:
        plt.scatter(dataset[:,0], dataset[:,1], cmap = "cool", edgecolors="Black")
    else:
        print("Argument dataset should be an (x,y) tuple or an ndarray.")

def plot_classification(dataset, model):
    h = 0.05 # mesh size
    x_min, x_max = dataset[0][:, 0].min() - 1, dataset[0][:,0].max() + 1
    y_min, y_max = dataset[0][:, 1].min() - 1, dataset[0][:,1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    Z = []
    for pt in np.c_[xx.ravel(), yy.ravel()]:
        Z.append(model.forward(pt).argmax())
    Z = np.asarray(Z)
    Z = Z.reshape(xx.shape)

    plt.figure()
    plt.pcolormesh(xx, yy, Z, cmap='cool')
    plot_dataset(dataset)

def compute_accuracy(model, dataset):

    correct = 0
    for i in range(dataset[0].shape[0]):
        if model.forward(dataset[0][i]).argmax() == dataset[1][i]:
            correct += 1

    return (correct, dataset[0].shape[0])

def draw_gaussian(mean, cov):
    ax = plt.gca()

    plt.scatter(mean[0], mean[1], c="Orange")

    w, v = np.linalg.eig(cov)

    idx = w.argsort()[::-1]
    w = w[idx]
    v = v[idx]

    ax.add_patch(Ellipse((mean[0], mean[1]),
            2.0 * sqrt(5.991 * w[1]),
            2.0 * sqrt(5.991 * w[0]),
            np.rad2deg(-np.arctan2(v[1][1], v[1][0])),
            linewidth=1,
            fill=False ))
	import numpy as np
	import matplotlib.pyplot as plt
	from sklearn import datasets
	from matplotlib.patches import Ellipse
	from math import sqrt

	def build_dataset(n_samples, n_features, n_classes = 1):
	'''
	Create a dataset.

	@arg n_samples - the number of points to generate.
	@arg n_features - the dimensionality of the points
	@arg n_classes - the number of classes to generate.
	Defaults to 1 for "unlabeled" data.

	returns (x,y), where

	x is a numpy array with x.shape = (n_samples, n_features).
	y is a numpy array with y.shape = n_samples.
	'''
	return datasets.make_classification(n_samples=n_samples, n_features = n_features, n_classes = n_classes,
	n_informative = n_features, n_redundant = 0, n_repeated = 0,
	n_clusters_per_class=1, class_sep=2)



	def plot_dataset(dataset):
	if type(dataset) == tuple:
	plt.scatter(dataset[0][:,0], dataset[0][:,1], c = dataset[1], cmap = 'cool', edgecolors="Black")
	elif type(dataset) == np.ndarray:
	plt.scatter(dataset[:,0], dataset[:,1], cmap = "cool", edgecolors="Black")
	else:
	print("Argument dataset should be an (x,y) tuple or an ndarray.")

	def plot_classification(dataset, model):
	h = 0.05 # mesh size
	x_min, x_max = dataset[0][:, 0].min() - 1, dataset[0][:,0].max() + 1
	y_min, y_max = dataset[0][:, 1].min() - 1, dataset[0][:,1].max() + 1
	xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
	Z = []
	for pt in np.c_[xx.ravel(), yy.ravel()]:
	Z.append(model.forward(pt).argmax())
	Z = np.asarray(Z)
	Z = Z.reshape(xx.shape)

	plt.figure()
	plt.pcolormesh(xx, yy, Z, cmap='cool')
	plot_dataset(dataset)

	def compute_accuracy(model, dataset):

	correct = 0
	for i in range(dataset[0].shape[0]):
	if model.forward(dataset[0][i]).argmax() == dataset[1][i]:
	correct += 1

	return (correct, dataset[0].shape[0])

	def draw_gaussian(mean, cov):
	ax = plt.gca()

	plt.scatter(mean[0], mean[1], c="Orange")

	w, v = np.linalg.eig(cov)

	idx = w.argsort()[::-1]
	w = w[idx]
	v = v[idx]

	ax.add_patch(Ellipse((mean[0], mean[1]),
	2.0 * sqrt(5.991 * w[1]),
	2.0 * sqrt(5.991 * w[0]),
	np.rad2deg(-np.arctan2(v[1][1], v[1][0])),
	linewidth=1,
	fill=False ))