Skip to content

Instantly share code, notes, and snippets.

@Yuri-M-Dias
Created March 9, 2017 12:57
Show Gist options
  • Save Yuri-M-Dias/d7b69fd8bc873cdf6029af5dcb5a8a0f to your computer and use it in GitHub Desktop.
Save Yuri-M-Dias/d7b69fd8bc873cdf6029af5dcb5a8a0f to your computer and use it in GitHub Desktop.
An Nth nearest and Naive Bayes implementation in Python 3.
#!/usr/bin/env python
import math
import operator
from sklearn import datasets
from sklearn import svm
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
class DataMiningStrategy(object):
"""An absctract method using duck typing"""
def learnFromInput(self, elements, compareTo):
raise NotImplementedError("Implement the learn from input!")
class NNearestImpl(DataMiningStrategy):
"""N Nearest algorithm used, in here, only the 5 Nearest"""
def learnFromInput(self, elements, compareTo):
a1 = float(compareTo.split("|")[0])
a2 = float(compareTo.split("|")[1])
trainingClass = elements[-1]#the class instruction
distances = []
for line in range(0, trainingClass.getNumberElements()):#all of the avaliable elements
b1 = float(elements[0].getElements()[line])
b2 = float(elements[1].getElements()[line])
distance = self.calculateDistance(a1, a2, b1, b2)
distances.append(DistanceHelper(distance, a1, a2, b1, b2, trainingClass.getElements()[line]))
pass
everyTimeElementAppeared = [0 for x in range(0,5)]
possibleelements = []
distances.sort(key=operator.attrgetter('distance'))#sorts the class by the atribuite distance
for x in range(0, 5):#Only the five first
print(distances[x])
if str(distances[x].getClassName) in possibleelements:
everyTimeElementAppeared[possibleelements.index(str(distances[x].getClassName))] += 1
else:
possibleelements.append(str(distances[x].getClassName))
everyTimeElementAppeared[possibleelements.index(str(distances[x].getClassName))] += 1
pass
#print(possibleelements)
return possibleelements
"""Calculates the distance between two points"""
def calculateDistance(self, a1, a2, b1, b2):
distance = math.sqrt((a1 - b1)**2 + (a2 - b2)**2)#for a bi-dimensional distance
#I made the function so that it can be altered later for a multi-dimensional implementation
return distance
class NaiveBayesImpl(DataMiningStrategy):
"""Calculates the prediction using the Naive Bayes, from the training set and returning already the name of the prediction"""
def learnFromInput(self, elements, compareTo):
bayesProbability = []
possibleelements = elements[4].getPossibleElements()
for element in range(0,elements[4].getNumberPossibleElements()):#for each of the possible classes
priorprobability = elements[4].calculateEachPossibleElemProb()[element]#P(Ci)
print(priorprobability)
for x in range(0,4):#all of the avaliable elements
# pi of P(a1 = v1 | Ci)...
priorprobability *= self.calculateConditionalProbability(elements[x], elements[4], str(compareTo[x]), str(possibleelements[element]))
pass
print(priorprobability, "2")
bayesProbability.append(priorprobability)
pass
for x in bayesProbability:
print(x)
pass
print(possibleelements[bayesProbability.index(max(bayesProbability))])
return possibleelements[bayesProbability.index(max(bayesProbability))]
"""calculates the conditional probability, if there's two itens in the same line of input"""
def calculateConditionalProbability(self, numelem1, numelem2, compare1, compare2):
probabilityofTwo = 0
for x in range(0,20):
if numelem1.getElements()[x] == str(compare1) and numelem2.getElements()[x] == str(compare2):
probabilityofTwo += 1
pass
pass
probability = probabilityofTwo/numelem2.getTimesElementAppeared()[numelem2.getPossibleElements().index(str(compare2))]
return probability
class ElementToCalculate(object):
"""The class for each element that'll be counted"""
def __init__(self, name):
self.name = name
self.numberOfPossibleElements = 0
self.possibleelements = []
self.numberofelements = 0
self.elements = []
self.everyTimeElementAppeared = [0 for x in range(0,50)]
pass
def getName(self):
return str(self.name)
def getElements(self):
return self.elements
def getNumberElements(self):
return int(self.numberofelements)
def getPossibleElements(self):
return self.possibleelements
def getNumberPossibleElements(self):
return int(self.numberOfPossibleElements)
def getTimesElementAppeared(self):
return self.everyTimeElementAppeared
def addElement(self, elementname):
if elementname in self.possibleelements:#If the item is already in the possible number of elements
self.elements.append(str(elementname))
self.numberofelements += 1
self.everyTimeElementAppeared[self.possibleelements.index(str(elementname))] += 1
else:#Unknow item, add it to the possible ones
self.possibleelements.append(str(elementname))
self.numberOfPossibleElements += 1
self.elements.append(str(elementname))
self.numberofelements += 1
self.everyTimeElementAppeared[self.possibleelements.index(str(elementname))] += 1
pass
pass
"""simple probability calculation, number of times it appeared"""
def calculateEachPossibleElemProb(self):
probabilites = []
for pelement in range(0, self.numberOfPossibleElements):
number = self.everyTimeElementAppeared[pelement]/self.numberofelements
probabilites.append(number)
pass
return probabilites
class DistanceHelper(object):
"""Just one object made to help with the the sorting and priting of the
Nth Nearest algoritm"""
def __init__(self, distance, a1, a2, b1, b2, bClass):
super(DistanceHelper, self).__init__()
self.distance = distance
self.a1 = a1
self.a2 = a2
self.b1 = b1
self.b2 = b2
self.bClass = str(bClass)
def __repr__(self):
return ('Distance: '+str(self.distance)+' from '+str(self.a1)+', '+str(self.a2)+' and '+str(self.b1)+', '+str(self.b2)+' and with class: '+self.bClass)
def getClassName(self):
return str(self.bClass)
#-----Helper functions: print and read from the training set-----
def printAllElements(elements):
"""prints all the elements in the training set"""
for line in elements:
print(line.getName(), 'with', line.getNumberElements(), 'elements')
for elems in line.getElements():
print(elems)
pass
print('And possible elements:')
probabilites = line.calculateEachPossibleElemProb()
possibleelements = line.getPossibleElements()
for x in range(0,line.getNumberPossibleElements()):
print(possibleelements[x], 'with probability of:', probabilites[x])
pass
pass
pass
def readFileTokenizer(name):
"""reads from a file, splitig the input by lines and by the '|' operator"""
file = open(name+'.txt', 'r')
lines = [line.rstrip('\n').split('|') for line in file]
file.close()
return lines
def main():
lines = readFileTokenizer(str(input('Digite o nome do arquivo de treinamento\n')))
elements = [ElementToCalculate(elementsnames) for elementsnames in lines[0]]
lines.remove(lines[0])
for line in lines:
for x in range(0,len(elements)):
elements[x].addElement(line[x])
pass
pass
dataMiningAlgorithm = DataMiningStrategy()
choice = int(input("Type 0 for Nth Nearest, or 1 for Naive Bayes\n"))
if(choice == 0):
dataMiningAlgoritm = NNearestImpl()
elif(choice == 1):
dataMiningAlgoritm = NaiveBayesImpl()
else:
print("You didn't type 0 or 1, you liar! Run it again!")
exit
pass
dataMiningAlgoritm.learnFromInput(elements, str(input("Type the data to be predicted/learned from, like '9.1|11.0' or 'weekday|winter|high|heavy'\n")))
#input("Press enter to continue...")#just to use it in single-time windows settings
#main()
mydata = pd.read_csv('trainingNear2.csv')
target = mydata["Class"]
#iris = datasets.load_iris()
#data = iris.data[:,:2]
#target = iris.target
data = mydata.ix[:,:-1]
#print(data)
data_train = data[:-5]
target_train = target[:-5]
data_test = data[-5:]
target_test = target[-5:]
print(data_train)
print(target_train)
#The Gaussian Naive Bayesgnb = GaussianNB()
gnb.fit(data_train, target_train)
print("Prediction with GaussianNB: %s" % (gnb.predict(data_test)))
#Kn-Neighbors
knn = KNeighborsClassifier(weights='distance', algorithm='auto')
knn.fit(data_train, target_train)
print("Prediction with knn: %s" % knn.predict(data_test))
print(target_test)
data_test = np.array(data_test)
target_test = np.array(target_test)
Day Season Wind Rain Class
weekday spring none none on time
weekday winter none slight on time
weekday winter none slight on time
weekday winter high heavy late
saturday summer normal none on time
weekday autumn normal none very late
holiday summer high slight on time
sunday summer normal none on time
weekday winter high heavy very late
weekday summer none slight on time
saturday spring high heavy cancelled
weekday summer high slight on time
saturday winter normal none late
weekday summer high none on time
weekday winter normal heavy very late
saturday autumn high slight on time
weekday autumn none heavy on time
holiday spring normal slight on time
weekday spring normal none on time
weekday spring normal slight on time
Attribute1 Attribute2 Class
0.8 6.3 0
1.4 8.1 0
2.1 7.4 0
2.6 14.3 1
6.8 12.6 0
8.8 9.8 1
9.2 11.6 0
10.8 9.6 1
11.8 9.9 1
12.4 6.5 1
12.8 1.1 0
14.0 19.9 0
14.2 18.5 0
15.6 17.4 0
15.8 12.2 0
16.6 6.7 1
17.4 4.5 1
18.2 6.9 1
19.0 3.4 0
19.6 11.1 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment