Last active
October 18, 2017 22:58
-
-
Save walkingpendulum/fb8f8bf1f77780bbc335429a4802cde8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from itertools import combinations | |
Alearn=np.array([ | |
[5.1, 3.5, 1.4, 0.2], | |
[4.9, 3.0, 1.4, 0.2], | |
[4.7, 3.2, 1.3, 0.2], | |
[4.6, 3.1, 1.5, 0.2], | |
[5.0, 3.6, 1.4, 0.2], | |
[5.4, 3.9, 1.7, 0.4], | |
[4.6, 3.4, 1.4, 0.3], | |
[5.0, 3.4, 1.5, 0.2], | |
[4.4, 2.9, 1.4, 0.2], | |
[4.9, 3.1, 1.5, 0.1], | |
[5.4, 3.7, 1.5, 0.2], | |
[4.8, 3.4, 1.6, 0.2], | |
[4.8, 3.0, 1.4, 0.1], | |
[4.3, 3.0, 1.1, 0.1], | |
[5.8, 4.0, 1.2, 0.2], | |
[5.7, 4.4, 1.5, 0.4], | |
[5.4, 3.9, 1.3, 0.4], | |
[5.1, 3.5, 1.4, 0.3], | |
[5.7, 3.8, 1.7, 0.3], | |
[5.1, 3.8, 1.5, 0.3]]); | |
Aexam=np.array([ | |
[5.4, 3.4, 1.7, 0.2], | |
[5.1, 3.7, 1.5, 0.4], | |
[4.6, 3.6, 1.0, 0.2], | |
[5.1, 3.3, 1.7, 0.5], | |
[4.8, 3.4, 1.9, 0.2], | |
[5.0, 3.0, 1.6, 0.2], | |
[5.0, 3.4, 1.6, 0.4], | |
[5.2, 3.5, 1.5, 0.2], | |
[5.2, 3.4, 1.4, 0.2], | |
[4.7, 3.2, 1.6, 0.2], | |
[4.8, 3.1, 1.6, 0.2], | |
[5.4, 3.4, 1.5, 0.4], | |
[5.2, 4.1, 1.5, 0.1], | |
[5.5, 4.2, 1.4, 0.2], | |
[4.9, 3.1, 1.5, 0.2], | |
[5.0, 3.2, 1.2, 0.2], | |
[5.5, 3.5, 1.3, 0.2], | |
[4.9, 3.6, 1.4, 0.1], | |
[4.4, 3.0, 1.3, 0.2], | |
[5.1, 3.4, 1.5, 0.2], | |
[5.0, 3.5, 1.3, 0.3], | |
[4.5, 2.3, 1.3, 0.3], | |
[4.4, 3.2, 1.3, 0.2], | |
[5.0, 3.5, 1.6, 0.6], | |
[5.1, 3.8, 1.9, 0.4], | |
[4.8, 3.0, 1.4, 0.3], | |
[5.1, 3.8, 1.6, 0.2], | |
[4.6, 3.2, 1.4, 0.2], | |
[5.3, 3.7, 1.5, 0.2], | |
[5.0, 3.3, 1.4, 0.2]]); | |
Blearn=np.array([ | |
[7.0, 3.2, 4.7, 1.4], | |
[6.4, 3.2, 4.5, 1.5], | |
[6.9, 3.1, 4.9, 1.5], | |
[5.5, 2.3, 4.0, 1.3], | |
[6.5, 2.8, 4.6, 1.5], | |
[5.7, 2.8, 4.5, 1.3], | |
[6.3, 3.3, 4.7, 1.6], | |
[4.9, 2.4, 3.3, 1.0], | |
[6.6, 2.9, 4.6, 1.3], | |
[5.2, 2.7, 3.9, 1.4], | |
[5.0, 2.0, 3.5, 1.0], | |
[5.9, 3.0, 4.2, 1.5], | |
[6.0, 2.2, 4.0, 1.0], | |
[6.1, 2.9, 4.7, 1.4], | |
[5.6, 2.9, 3.6, 1.3], | |
[6.7, 3.1, 4.4, 1.4], | |
[5.6, 3.0, 4.5, 1.5], | |
[5.8, 2.7, 4.1, 1.0], | |
[6.2, 2.2, 4.5, 1.5], | |
[5.6, 2.5, 3.9, 1.1]]); | |
Bexam=np.array([ | |
[5.9, 3.2, 4.8, 1.8], | |
[6.1, 2.8, 4.0, 1.3], | |
[6.3, 2.5, 4.9, 1.5], | |
[6.1, 2.8, 4.7, 1.2], | |
[6.4, 2.9, 4.3, 1.3], | |
[6.6, 3.0, 4.4, 1.4], | |
[6.8, 2.8, 4.8, 1.4], | |
[6.7, 3.0, 5.0, 1.7], | |
[6.0, 2.9, 4.5, 1.5], | |
[5.7, 2.6, 3.5, 1.0], | |
[5.5, 2.4, 3.8, 1.1], | |
[5.5, 2.4, 3.7, 1.0], | |
[5.8, 2.7, 3.9, 1.2], | |
[6.0, 2.7, 5.1, 1.6], | |
[5.4, 3.0, 4.5, 1.5], | |
[6.0, 3.4, 4.5, 1.6], | |
[6.7, 3.1, 4.7, 1.5], | |
[6.3, 2.3, 4.4, 1.3], | |
[5.6, 3.0, 4.1, 1.3], | |
[5.5, 2.5, 4.0, 1.3], | |
[5.5, 2.6, 4.4, 1.2], | |
[6.1, 3.0, 4.6, 1.4], | |
[5.8, 2.6, 4.0, 1.2], | |
[5.0, 2.3, 3.3, 1.0], | |
[5.6, 2.7, 4.2, 1.3], | |
[5.7, 3.0, 4.2, 1.2], | |
[5.7, 2.9, 4.2, 1.3], | |
[6.2, 2.9, 4.3, 1.3], | |
[5.1, 2.5, 3.0, 1.1], | |
[5.7, 2.8, 4.1, 1.3]]); | |
Clearn=np.array([ | |
[6.3, 3.3, 6.0, 2.5], | |
[5.8, 2.7, 5.1, 1.9], | |
[7.1, 3.0, 5.9, 2.1], | |
[6.3, 2.9, 5.6, 1.8], | |
[6.5, 3.0, 5.8, 2.2], | |
[7.6, 3.0, 6.6, 2.1], | |
[4.9, 2.5, 4.5, 1.7], | |
[7.3, 2.9, 6.3, 1.8], | |
[6.7, 2.5, 5.8, 1.8], | |
[7.2, 3.6, 6.1, 2.5], | |
[6.5, 3.2, 5.1, 2.0], | |
[6.4, 2.7, 5.3, 1.9], | |
[6.8, 3.0, 5.5, 2.1], | |
[5.7, 2.5, 5.0, 2.0], | |
[5.8, 2.8, 5.1, 2.4], | |
[6.4, 3.2, 5.3, 2.3], | |
[6.5, 3.0, 5.5, 1.8], | |
[7.7, 3.8, 6.7, 2.2], | |
[7.7, 2.6, 6.9, 2.3], | |
[6.0, 2.2, 5.0, 1.5]]); | |
Cexam=np.array([ | |
[6.9, 3.2, 5.7, 2.3], | |
[5.6, 2.8, 4.9, 2.0], | |
[7.7, 2.8, 6.7, 2.0], | |
[6.3, 2.7, 4.9, 1.8], | |
[6.7, 3.3, 5.7, 2.1], | |
[7.2, 3.2, 6.0, 1.8], | |
[6.2, 2.8, 4.8, 1.8], | |
[6.1, 3.0, 4.9, 1.8], | |
[6.4, 2.8, 5.6, 2.1], | |
[7.2, 3.0, 5.8, 1.6], | |
[7.4, 2.8, 6.1, 1.9], | |
[7.9, 3.8, 6.4, 2.0], | |
[6.4, 2.8, 5.6, 2.2], | |
[6.3, 2.8, 5.1, 1.5], | |
[6.1, 2.6, 5.6, 1.4], | |
[7.7, 3.0, 6.1, 2.3], | |
[6.3, 3.4, 5.6, 2.4], | |
[6.4, 3.1, 5.5, 1.8], | |
[6.0, 3.0, 4.8, 1.8], | |
[6.9, 3.1, 5.4, 2.1], | |
[6.7, 3.1, 5.6, 2.4], | |
[6.9, 3.1, 5.1, 2.3], | |
[5.8, 2.7, 5.1, 1.9], | |
[6.8, 3.2, 5.9, 2.3], | |
[6.7, 3.3, 5.7, 2.5], | |
[6.7, 3.0, 5.2, 2.3], | |
[6.3, 2.5, 5.0, 1.9], | |
[6.5, 3.0, 5.2, 2.0], | |
[6.2, 3.4, 5.4, 2.3], | |
[5.9, 3.0, 5.1, 1.8]]); | |
class NaiveBayesBinaryClassifier: | |
def __init__(self, X, y): | |
self.X = X | |
self.y = y | |
self.means = None | |
self.first_class_regressor = None | |
self.second_class_regressor = None | |
def _fit_means(self): | |
self.means = X.mean(axis=0) | |
def _make_regressor(self, current_class_representers_data): | |
bayesian_features = current_class_representers_data > self.means | |
pos_probs = (bayesian_features.sum(axis=0) + 1) / (len(bayesian_features) + 2) | |
neg_probs = 1 - pos_probs | |
def regressor(X): | |
bayesian_features = X > self.means | |
probs_matrix = np.multiply(bayesian_features, pos_probs) + np.multiply(1 - bayesian_features, neg_probs) | |
return probs_matrix.prod(axis=1) | |
return regressor | |
def fit(self): | |
self._fit_means() | |
self.first_class_regressor = self._make_regressor(X[y == 0]) | |
self.second_class_regressor = self._make_regressor(X[y == 1]) | |
def predict(self, X): | |
first_probs = self.first_class_regressor(X).reshape((len(X), 1)) | |
second_probs = self.second_class_regressor(X).reshape((len(X), 1)) | |
probs = np.concatenate((first_probs, second_probs), axis=1) | |
return np.argmax(probs, axis=1) | |
if __name__ == '__main__': | |
input_ = combinations([(Alearn, Aexam), (Blearn, Bexam), (Clearn, Cexam)], 2) | |
labels = combinations('ABC', 2) | |
results = {} | |
for label, ((first_learn, first_exam), (second_learn, second_exam)) in zip(labels, input_): | |
X = np.concatenate((first_learn, second_learn), axis=0) | |
y = np.array([0] * len(first_learn) + [1] * len(second_learn)) | |
clf = NaiveBayesBinaryClassifier(X, y) | |
clf.fit() | |
exam = np.concatenate((first_exam, second_exam), axis=0) | |
predictions = clf.predict(exam) | |
ground_truth = np.array([0] * len(first_exam) + [1] * len(second_exam)) | |
error_rate_learn = float((clf.predict(X) != y).sum() / len(y)) | |
error_rate_exam = float((clf.predict(exam) != ground_truth).sum() / len(ground_truth)) | |
results[' vs '.join(label)] = 'error rate learn={:.2f}, exam={:.2f}'.format(error_rate_learn, error_rate_exam) | |
for item in results.items(): | |
print("%s: %s" % item) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment