Skip to content

Instantly share code, notes, and snippets.

@wy36101299
Created January 25, 2015 07:50
Show Gist options
  • Save wy36101299/185d95442a1f83d4ee29 to your computer and use it in GitHub Desktop.
Save wy36101299/185d95442a1f83d4ee29 to your computer and use it in GitHub Desktop.
Bayes-classify 貝氏分類器
import math
import numpy as np
# 關鍵字所屬分類 P P S S T T
# 新聞 分類 賓士 寶馬 籃球 路跑 手機 App
# ----------------------------------------------
# C63發表會 P 15 25 0 5 8 3
# BMW i8 P 35 40 1 3 3 2
# 林書豪 S 5 0 35 50 0 0
# 湖人隊 S 1 5 32 15 0 0
# Android 5.0 T 10 5 7 0 2 30
# iPhone6 T 5 5 5 15 8 32
dataSet=[]
label=[]
lines =[['p',15,25,0,5,8,3],['p',35,40,1,3,3,2],['s',5,0,35,50,0,0],
['s',1,5,32,15,0,0],['t',10,5,7,0,2,30],['t',5,5,5,15,8,32]]
for index, line in enumerate(lines):
label.append(line[0])
dimension = len(line[1:])
dataSet.append(map(float,line[1:]))
dataSet = np.array(dataSet)
def Bayestrain(classifyRange):
alltrainSample = dataSet.sum()
classifyProbabilityList = []
# P(分類) = 該分類下字詞頻率總和 / 所有訓練集合字詞頻率總和
for key, value in classifyRange.iteritems() :
tmp = dataSet[:, value[0] : value[1] ].sum()/alltrainSample
classifyProbabilityList.append(tmp)
# P(特徵關鍵字|分類) = (該分類下、該關鍵字字詞頻率總和 + 1) / (該分類下所有關鍵字字詞頻率總和 + 訓練集合關鍵字個數)
featureProbabilityList=[]
for key, value in classifyRange.iteritems() :
t = []
for a in range(dimension):
featureProbability = (dataSet[value[0]:value[1]][:,a].sum()+1)/(dataSet[value[0]:value[1]].sum()+dimension)
t.append(featureProbability)
featureProbabilityList.append(t)
return classifyProbabilityList,featureProbabilityList
def Bayespredict(classifyProbabilityList,featureProbabilityList,classifyRange,predictList):
tmp = []
for key, value in classifyRange.iteritems() :
tmp.append(key)
compareList = []
for c,fList in zip(classifyProbabilityList,featureProbabilityList):
c = math.log10(c)
# 取 log 避免向下溢位情況發生
for p,f in zip(predictList,fList):
c += (p*math.log10(f))
compareList.append(c)
predict = tmp [compareList.index(max(compareList))]
return predict
# 2,4為門檻值-關鍵字分類p [:2] 關鍵字分類s [2:4] 關鍵字分類t [4:dimension]
classifyRange = {'p':[0,2],'s':[2,4],'t':[4,6]}
classifyProbabilityList , featureProbabilityList = Bayestrain(classifyRange)
# classifyProbabilityList,featureProbabilityList 皆為train後的結果
# classifyRange 所設之門檻
# predictList 要預測的feature
predictList=[10,2,50,56,8,5]
predict = Bayespredict(classifyProbabilityList,featureProbabilityList,classifyRange,predictList)
predict
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment