jesusjda/classify.py

## classify.py
'''
Created on Jul 15, 2019

@author: Jesus Doménech
@email jdomenec@ucm.es
'''

# TO INSTALL: run following commands
# pip3 install -U sklearn numpy
# - if you want to plot you will also need matplotlib
# pip3 install -U matplotlib

# TO RUN: go to the bottom of the file

import numpy as np
from sklearn.svm import SVC, LinearSVC
BAD_CLASS = -1000
GOOD_CLASS = 1000


def split(good_points, bad_points):
    bad_y = [BAD_CLASS] * len(bad_points)
    good_y = [GOOD_CLASS] * len(good_points)
    X = np.array(bad_points + good_points)
    Y = np.array(bad_y + good_y)
    # Choose which classifier you want to use.
    ###  1 - Perceptron
    # clf = Perceptron()
    ###  2 - linear
    # clf = LinearSVC()
    ###  3 - SVM
    clf = SVC(kernel="linear")
    clf.fit(X, Y)
    line = []
    try:
        line.append(clf.intercept_[0])
        for i in range(len(clf.coef_[0])):
            line.append(clf.coef_[0][i])

        # You can call plot to print the line and the good and bad points
        # it only works if you are using dimension 2
        plot(clf, X, Y)
        return line
    except TypeError:
        return None


def plot(classifier, X, Y):
    if len(X) == 0 or len(X[0]) != 2:
        return
    import matplotlib.pyplot as plt
    w = classifier.coef_[0]
    if w[1] == 0:
        yy = np.linspace(min([Xi[1] for Xi in X]) - 2, max([Xi[1] for Xi in X]) + 2)
        # yy_down = yy_up = yy
        a = 0
        xx = [- (classifier.intercept_[0]) / w[0]] * len(yy)
    else:
        xx = np.linspace(min([Xi[0] for Xi in X]) - 2, max([Xi[0] for Xi in X]) + 2)
        # xx_down = xx_up = xx
        a = -w[0] / w[1]
        yy = a * xx - (classifier.intercept_[0]) / w[1]
    plt.plot(xx, yy, 'k-')
    plt.scatter(X[:, 0], X[:, 1], c=Y)
    plt.axis('tight')
    plt.show()


class Perceptron(object):
    """Perceptron classifier.

    Parameters
    ------------
    eta : float
        Learning rate (between 0.0 and 1.0)
    n_iter : int
        Passes over the training dataset.

    Attributes
    -----------
    w_ : 1d-array
        Weights after fitting.
    errors_ : list
        Number of misclassifications in every epoch.

    """
    def __init__(self, eta=0.2, n_iter=100):
        self.eta = eta
        self.n_iter = n_iter

    def fit(self, X, y):
        """Fit training data.

        Parameters
        ----------
        X : {array-like}, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples and
            n_features is the number of features.
        y : array-like, shape = [n_samples]
            Target values.

        Returns
        -------
        self : object

        """

        self.w_ = np.zeros(1 + X.shape[1])
        self.errors_ = []

        for __ in range(self.n_iter):
            errors = 0
            for xi, target in zip(X, y):
                update = self.eta * (target - self.predict(xi))
                self.w_[1:] += update * xi
                self.w_[0] += update
                errors += int(update != 0.0)
            self.errors_.append(errors)
        self.coef_ = [list(self.w_[1:])]
        self.intercept_ = [-self.w_[0]]
        return self

    def net_input(self, X):
        """Calculate net input"""
        return np.dot(X, self.w_[1:]) + self.w_[0]

    def predict(self, X):
        """Return class label after unit step"""
        return np.where(self.net_input(X) >= 0.0, 1, -1)


# DON'T REMOVE THIS LINE
if __name__ == '__main__':
    # This lines will be run we you execute the command:
    # python3 classify.py

    # 1 - Generate good and bad points
    goods = [[i, -10] for i in range(1000)]
    bads = [[i, 10] for i in range(1000)]
    # 2 - Call split
    line = split(goods, bads)
    # 3 - Analyze the output
    # The first position is the independent term.
    # The rest are the coeffs
    print(line)
	'''
	Created on Jul 15, 2019

	@author: Jesus Doménech
	@email jdomenec@ucm.es
	'''

	# TO INSTALL: run following commands
	# pip3 install -U sklearn numpy
	# - if you want to plot you will also need matplotlib
	# pip3 install -U matplotlib

	# TO RUN: go to the bottom of the file

	import numpy as np
	from sklearn.svm import SVC, LinearSVC
	BAD_CLASS = -1000
	GOOD_CLASS = 1000


	def split(good_points, bad_points):
	bad_y = [BAD_CLASS] * len(bad_points)
	good_y = [GOOD_CLASS] * len(good_points)
	X = np.array(bad_points + good_points)
	Y = np.array(bad_y + good_y)
	# Choose which classifier you want to use.
	### 1 - Perceptron
	# clf = Perceptron()
	### 2 - linear
	# clf = LinearSVC()
	### 3 - SVM
	clf = SVC(kernel="linear")
	clf.fit(X, Y)
	line = []
	try:
	line.append(clf.intercept_[0])
	for i in range(len(clf.coef_[0])):
	line.append(clf.coef_[0][i])

	# You can call plot to print the line and the good and bad points
	# it only works if you are using dimension 2
	plot(clf, X, Y)
	return line
	except TypeError:
	return None


	def plot(classifier, X, Y):
	if len(X) == 0 or len(X[0]) != 2:
	return
	import matplotlib.pyplot as plt
	w = classifier.coef_[0]
	if w[1] == 0:
	yy = np.linspace(min([Xi[1] for Xi in X]) - 2, max([Xi[1] for Xi in X]) + 2)
	# yy_down = yy_up = yy
	a = 0
	xx = [- (classifier.intercept_[0]) / w[0]] * len(yy)
	else:
	xx = np.linspace(min([Xi[0] for Xi in X]) - 2, max([Xi[0] for Xi in X]) + 2)
	# xx_down = xx_up = xx
	a = -w[0] / w[1]
	yy = a * xx - (classifier.intercept_[0]) / w[1]
	plt.plot(xx, yy, 'k-')
	plt.scatter(X[:, 0], X[:, 1], c=Y)
	plt.axis('tight')
	plt.show()


	class Perceptron(object):
	"""Perceptron classifier.

	Parameters
	------------
	eta : float
	Learning rate (between 0.0 and 1.0)
	n_iter : int
	Passes over the training dataset.

	Attributes
	-----------
	w_ : 1d-array
	Weights after fitting.
	errors_ : list
	Number of misclassifications in every epoch.

	"""
	def __init__(self, eta=0.2, n_iter=100):
	self.eta = eta
	self.n_iter = n_iter

	def fit(self, X, y):
	"""Fit training data.

	Parameters
	----------
	X : {array-like}, shape = [n_samples, n_features]
	Training vectors, where n_samples is the number of samples and
	n_features is the number of features.
	y : array-like, shape = [n_samples]
	Target values.

	Returns
	-------
	self : object

	"""

	self.w_ = np.zeros(1 + X.shape[1])
	self.errors_ = []

	for __ in range(self.n_iter):
	errors = 0
	for xi, target in zip(X, y):
	update = self.eta * (target - self.predict(xi))
	self.w_[1:] += update * xi
	self.w_[0] += update
	errors += int(update != 0.0)
	self.errors_.append(errors)
	self.coef_ = [list(self.w_[1:])]
	self.intercept_ = [-self.w_[0]]
	return self

	def net_input(self, X):
	"""Calculate net input"""
	return np.dot(X, self.w_[1:]) + self.w_[0]

	def predict(self, X):
	"""Return class label after unit step"""
	return np.where(self.net_input(X) >= 0.0, 1, -1)


	# DON'T REMOVE THIS LINE
	if __name__ == '__main__':
	# This lines will be run we you execute the command:
	# python3 classify.py

	# 1 - Generate good and bad points
	goods = [[i, -10] for i in range(1000)]
	bads = [[i, 10] for i in range(1000)]
	# 2 - Call split
	line = split(goods, bads)
	# 3 - Analyze the output
	# The first position is the independent term.
	# The rest are the coeffs
	print(line)