Created
March 9, 2017 12:57
-
-
Save Yuri-M-Dias/d7b69fd8bc873cdf6029af5dcb5a8a0f to your computer and use it in GitHub Desktop.
An Nth nearest and Naive Bayes implementation in Python 3.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import math | |
import operator | |
from sklearn import datasets | |
from sklearn import svm | |
import numpy as np | |
import pandas as pd | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.naive_bayes import GaussianNB | |
import matplotlib.pyplot as plt | |
from matplotlib.colors import ListedColormap | |
class DataMiningStrategy(object): | |
"""An absctract method using duck typing""" | |
def learnFromInput(self, elements, compareTo): | |
raise NotImplementedError("Implement the learn from input!") | |
class NNearestImpl(DataMiningStrategy): | |
"""N Nearest algorithm used, in here, only the 5 Nearest""" | |
def learnFromInput(self, elements, compareTo): | |
a1 = float(compareTo.split("|")[0]) | |
a2 = float(compareTo.split("|")[1]) | |
trainingClass = elements[-1]#the class instruction | |
distances = [] | |
for line in range(0, trainingClass.getNumberElements()):#all of the avaliable elements | |
b1 = float(elements[0].getElements()[line]) | |
b2 = float(elements[1].getElements()[line]) | |
distance = self.calculateDistance(a1, a2, b1, b2) | |
distances.append(DistanceHelper(distance, a1, a2, b1, b2, trainingClass.getElements()[line])) | |
pass | |
everyTimeElementAppeared = [0 for x in range(0,5)] | |
possibleelements = [] | |
distances.sort(key=operator.attrgetter('distance'))#sorts the class by the atribuite distance | |
for x in range(0, 5):#Only the five first | |
print(distances[x]) | |
if str(distances[x].getClassName) in possibleelements: | |
everyTimeElementAppeared[possibleelements.index(str(distances[x].getClassName))] += 1 | |
else: | |
possibleelements.append(str(distances[x].getClassName)) | |
everyTimeElementAppeared[possibleelements.index(str(distances[x].getClassName))] += 1 | |
pass | |
#print(possibleelements) | |
return possibleelements | |
"""Calculates the distance between two points""" | |
def calculateDistance(self, a1, a2, b1, b2): | |
distance = math.sqrt((a1 - b1)**2 + (a2 - b2)**2)#for a bi-dimensional distance | |
#I made the function so that it can be altered later for a multi-dimensional implementation | |
return distance | |
class NaiveBayesImpl(DataMiningStrategy): | |
"""Calculates the prediction using the Naive Bayes, from the training set and returning already the name of the prediction""" | |
def learnFromInput(self, elements, compareTo): | |
bayesProbability = [] | |
possibleelements = elements[4].getPossibleElements() | |
for element in range(0,elements[4].getNumberPossibleElements()):#for each of the possible classes | |
priorprobability = elements[4].calculateEachPossibleElemProb()[element]#P(Ci) | |
print(priorprobability) | |
for x in range(0,4):#all of the avaliable elements | |
# pi of P(a1 = v1 | Ci)... | |
priorprobability *= self.calculateConditionalProbability(elements[x], elements[4], str(compareTo[x]), str(possibleelements[element])) | |
pass | |
print(priorprobability, "2") | |
bayesProbability.append(priorprobability) | |
pass | |
for x in bayesProbability: | |
print(x) | |
pass | |
print(possibleelements[bayesProbability.index(max(bayesProbability))]) | |
return possibleelements[bayesProbability.index(max(bayesProbability))] | |
"""calculates the conditional probability, if there's two itens in the same line of input""" | |
def calculateConditionalProbability(self, numelem1, numelem2, compare1, compare2): | |
probabilityofTwo = 0 | |
for x in range(0,20): | |
if numelem1.getElements()[x] == str(compare1) and numelem2.getElements()[x] == str(compare2): | |
probabilityofTwo += 1 | |
pass | |
pass | |
probability = probabilityofTwo/numelem2.getTimesElementAppeared()[numelem2.getPossibleElements().index(str(compare2))] | |
return probability | |
class ElementToCalculate(object): | |
"""The class for each element that'll be counted""" | |
def __init__(self, name): | |
self.name = name | |
self.numberOfPossibleElements = 0 | |
self.possibleelements = [] | |
self.numberofelements = 0 | |
self.elements = [] | |
self.everyTimeElementAppeared = [0 for x in range(0,50)] | |
pass | |
def getName(self): | |
return str(self.name) | |
def getElements(self): | |
return self.elements | |
def getNumberElements(self): | |
return int(self.numberofelements) | |
def getPossibleElements(self): | |
return self.possibleelements | |
def getNumberPossibleElements(self): | |
return int(self.numberOfPossibleElements) | |
def getTimesElementAppeared(self): | |
return self.everyTimeElementAppeared | |
def addElement(self, elementname): | |
if elementname in self.possibleelements:#If the item is already in the possible number of elements | |
self.elements.append(str(elementname)) | |
self.numberofelements += 1 | |
self.everyTimeElementAppeared[self.possibleelements.index(str(elementname))] += 1 | |
else:#Unknow item, add it to the possible ones | |
self.possibleelements.append(str(elementname)) | |
self.numberOfPossibleElements += 1 | |
self.elements.append(str(elementname)) | |
self.numberofelements += 1 | |
self.everyTimeElementAppeared[self.possibleelements.index(str(elementname))] += 1 | |
pass | |
pass | |
"""simple probability calculation, number of times it appeared""" | |
def calculateEachPossibleElemProb(self): | |
probabilites = [] | |
for pelement in range(0, self.numberOfPossibleElements): | |
number = self.everyTimeElementAppeared[pelement]/self.numberofelements | |
probabilites.append(number) | |
pass | |
return probabilites | |
class DistanceHelper(object): | |
"""Just one object made to help with the the sorting and priting of the | |
Nth Nearest algoritm""" | |
def __init__(self, distance, a1, a2, b1, b2, bClass): | |
super(DistanceHelper, self).__init__() | |
self.distance = distance | |
self.a1 = a1 | |
self.a2 = a2 | |
self.b1 = b1 | |
self.b2 = b2 | |
self.bClass = str(bClass) | |
def __repr__(self): | |
return ('Distance: '+str(self.distance)+' from '+str(self.a1)+', '+str(self.a2)+' and '+str(self.b1)+', '+str(self.b2)+' and with class: '+self.bClass) | |
def getClassName(self): | |
return str(self.bClass) | |
#-----Helper functions: print and read from the training set----- | |
def printAllElements(elements): | |
"""prints all the elements in the training set""" | |
for line in elements: | |
print(line.getName(), 'with', line.getNumberElements(), 'elements') | |
for elems in line.getElements(): | |
print(elems) | |
pass | |
print('And possible elements:') | |
probabilites = line.calculateEachPossibleElemProb() | |
possibleelements = line.getPossibleElements() | |
for x in range(0,line.getNumberPossibleElements()): | |
print(possibleelements[x], 'with probability of:', probabilites[x]) | |
pass | |
pass | |
pass | |
def readFileTokenizer(name): | |
"""reads from a file, splitig the input by lines and by the '|' operator""" | |
file = open(name+'.txt', 'r') | |
lines = [line.rstrip('\n').split('|') for line in file] | |
file.close() | |
return lines | |
def main(): | |
lines = readFileTokenizer(str(input('Digite o nome do arquivo de treinamento\n'))) | |
elements = [ElementToCalculate(elementsnames) for elementsnames in lines[0]] | |
lines.remove(lines[0]) | |
for line in lines: | |
for x in range(0,len(elements)): | |
elements[x].addElement(line[x]) | |
pass | |
pass | |
dataMiningAlgorithm = DataMiningStrategy() | |
choice = int(input("Type 0 for Nth Nearest, or 1 for Naive Bayes\n")) | |
if(choice == 0): | |
dataMiningAlgoritm = NNearestImpl() | |
elif(choice == 1): | |
dataMiningAlgoritm = NaiveBayesImpl() | |
else: | |
print("You didn't type 0 or 1, you liar! Run it again!") | |
exit | |
pass | |
dataMiningAlgoritm.learnFromInput(elements, str(input("Type the data to be predicted/learned from, like '9.1|11.0' or 'weekday|winter|high|heavy'\n"))) | |
#input("Press enter to continue...")#just to use it in single-time windows settings | |
#main() | |
mydata = pd.read_csv('trainingNear2.csv') | |
target = mydata["Class"] | |
#iris = datasets.load_iris() | |
#data = iris.data[:,:2] | |
#target = iris.target | |
data = mydata.ix[:,:-1] | |
#print(data) | |
data_train = data[:-5] | |
target_train = target[:-5] | |
data_test = data[-5:] | |
target_test = target[-5:] | |
print(data_train) | |
print(target_train) | |
#The Gaussian Naive Bayesgnb = GaussianNB() | |
gnb.fit(data_train, target_train) | |
print("Prediction with GaussianNB: %s" % (gnb.predict(data_test))) | |
#Kn-Neighbors | |
knn = KNeighborsClassifier(weights='distance', algorithm='auto') | |
knn.fit(data_train, target_train) | |
print("Prediction with knn: %s" % knn.predict(data_test)) | |
print(target_test) | |
data_test = np.array(data_test) | |
target_test = np.array(target_test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Day | Season | Wind | Rain | Class | |
---|---|---|---|---|---|
weekday | spring | none | none | on time | |
weekday | winter | none | slight | on time | |
weekday | winter | none | slight | on time | |
weekday | winter | high | heavy | late | |
saturday | summer | normal | none | on time | |
weekday | autumn | normal | none | very late | |
holiday | summer | high | slight | on time | |
sunday | summer | normal | none | on time | |
weekday | winter | high | heavy | very late | |
weekday | summer | none | slight | on time | |
saturday | spring | high | heavy | cancelled | |
weekday | summer | high | slight | on time | |
saturday | winter | normal | none | late | |
weekday | summer | high | none | on time | |
weekday | winter | normal | heavy | very late | |
saturday | autumn | high | slight | on time | |
weekday | autumn | none | heavy | on time | |
holiday | spring | normal | slight | on time | |
weekday | spring | normal | none | on time | |
weekday | spring | normal | slight | on time |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Attribute1 | Attribute2 | Class | |
---|---|---|---|
0.8 | 6.3 | 0 | |
1.4 | 8.1 | 0 | |
2.1 | 7.4 | 0 | |
2.6 | 14.3 | 1 | |
6.8 | 12.6 | 0 | |
8.8 | 9.8 | 1 | |
9.2 | 11.6 | 0 | |
10.8 | 9.6 | 1 | |
11.8 | 9.9 | 1 | |
12.4 | 6.5 | 1 | |
12.8 | 1.1 | 0 | |
14.0 | 19.9 | 0 | |
14.2 | 18.5 | 0 | |
15.6 | 17.4 | 0 | |
15.8 | 12.2 | 0 | |
16.6 | 6.7 | 1 | |
17.4 | 4.5 | 1 | |
18.2 | 6.9 | 1 | |
19.0 | 3.4 | 0 | |
19.6 | 11.1 | 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment