Skip to content

Instantly share code, notes, and snippets.

@randcode-generator
Last active July 27, 2017 00:04
Show Gist options
  • Save randcode-generator/1283ae20ad8313121be5f0daa607174c to your computer and use it in GitHub Desktop.
Save randcode-generator/1283ae20ad8313121be5f0daa607174c to your computer and use it in GitHub Desktop.
import numpy as np
import math
import operator
trainingSet = np.array([
[5.1, 3.5, 1.4, 0.2],
[4.9, 3.0, 1.4, 0.2],
[4.7, 3.2, 1.3, 0.2],
[7.0, 3.2, 4.7, 1.4],
[6.4, 3.2, 4.5, 1.5],
[6.9, 3.1, 4.9, 1.5],
[6.3, 3.3, 6.0, 2.5],
[5.8, 2.7, 5.1, 1.9],
[7.1, 3.0, 5.9, 2.1]])
classTypes = np.array([
'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
'Iris-versicolor','Iris-versicolor','Iris-versicolor',
'Iris-virginica','Iris-virginica','Iris-virginica'])
testVals = [[6.1, 3.0, 4.6, 1.4]]
def organizeData(trainingArr, classTypes):
dict = {}
for i in range(classTypes.size):
if classTypes[i] in dict:
dict[classTypes[i]] = np.append(dict[classTypes[i]], [trainingArr[i]], axis = 0)
else:
dict[classTypes[i]] = np.array([trainingArr[i]])
return dict
def mean(arr):
sum = 0.0
count = 0.0
for v in arr:
sum += v
count += 1
return sum/count
def standardDeviation(arr, avg):
count = 0
sum = 0.0
for v in arr:
sum += (v - avg)**2
count += 1
t1 = 1.0/count * sum
t2 = math.sqrt(t1)
return t2
def gaussianNB(val, avg, std):
exp = math.exp(-(val - avg)**2/(2 * std**2))
g = 1.0/(math.sqrt(2 * math.pi) * std) * exp
return g
def classProbability(arr):
classTypes = {}
probs = {}
count = 0.0
for val in arr:
count += 1
if val in classTypes:
classTypes[val] += 1
else:
classTypes[val] = 1
for val in classTypes.keys():
probs[val] = classTypes[val]/count
return probs
def getStats(data):
stats = {}
for classType in data:
dataArr = data[classType]
statsArr = []
for i in range(dataArr.shape[1]):
arr = dataArr[:,i]
varMean = mean(arr)
varStandardDeviation = standardDeviation(arr, varMean)
statsArr.append((varMean, varStandardDeviation))
stats[classType] = statsArr
return stats
def calculateGaussianNB(testVals, stats):
gaussianNBs = {}
arr = []
for classType in stats:
for i in range(len(testVals[0])):
g = gaussianNB(testVals[0][i], stats[classType][i][0], stats[classType][i][1])
arr.append(g)
gaussianNBs[classType] = arr
arr = []
return gaussianNBs
def posteriorNumerator(classProbs, gaussianNBs):
arr = []
for classType in classProbs:
total = 1.0
for g in gaussianNBs[classType]:
total *= g
arr.append((classType, classProbs[classType] * total))
total = 1.0
print arr
arr.sort(key=operator.itemgetter(1), reverse = True)
return arr
data = organizeData(trainingSet, classTypes)
stats = getStats(data)
classProbs = classProbability(classTypes)
gaussianNBs = calculateGaussianNB(testVals, stats)
numerator = posteriorNumerator(classProbs, gaussianNBs)
print numerator[0][0]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment