Created
January 31, 2013 18:32
-
-
Save tedwards/4685087 to your computer and use it in GitHub Desktop.
Attempt at bayesian assessment of tickets store in TRAC
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def service_bayes(aTicket, context_words, word_count, context_tickets, ticket_count): | |
''' | |
Input | |
aTicket: a ticket pulled from trac db stored as a list with the following fields | |
id, summary, type, status, service, created_date, lastupdated_date, reporter | |
context_words: a dictionary with the structure {service: {word: {count:integer}...}...} | |
used to correlate the number of times a word is associated with a service type | |
word_count: total number of words counted in all ticket summaries | |
context_tickets: a dictionary with the structure {service: {count:integer}...} | |
userd to correlate the number of times a ticket is associated with a service type | |
ticket_count: total number of tickets counted | |
Output | |
(top_key, top) | |
top_key: service type with the highest probability | |
top: probability associated with top_key | |
''' | |
priors={} | |
posteriors={} | |
for each in context_tickets.keys(): | |
if context_tickets[each]>0: | |
priors[each]=float(context_tickets[each]['count'])/ticket_count | |
for each in context_words.keys(): | |
posteriors[each]=0 | |
normalizer=0 | |
median_count=len(aTicket[1].split()) | |
for aWord in aTicket[1].split(): | |
if context_words[each].has_key(aWord): | |
likelihood=float(context_words[each][aWord]['count'])/context_words[each]['count'] | |
else: | |
likelihood=1.0/word_count | |
l_h=0 | |
for p in context_tickets.keys(): | |
if context_words[p].has_key(aWord): | |
l_h+=float(context_words[p][aWord]['count'])/context_words[p]['count'] | |
normalizer+=l_h*priors[p] | |
##print each,aWord,priors[each],likelihood,normalizer,(priors[each]*likelihood)/normalizer | |
posteriors[each]+=(priors[each]*likelihood)/normalizer | |
posteriors[each]=posteriors[each]/median_count | |
k=posteriors.keys() | |
k.sort() | |
top_key=k[0] | |
top=posteriors[top_key] | |
for p in k: | |
if posteriors[p]>top: | |
top=posteriors[p] | |
top_key=p | |
return (top_key, top) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment