tedwards/service_bayes.py

## service_bayes.py
def service_bayes(aTicket, context_words, word_count, context_tickets, ticket_count):
    '''
    Input
    aTicket: a ticket pulled from trac db stored as a list with the following fields
             id, summary, type, status, service, created_date, lastupdated_date, reporter

    context_words:  a dictionary with the structure {service: {word: {count:integer}...}...}
             used to correlate the number of times a word is associated with a service type

    word_count: total number of words counted in all ticket summaries

    context_tickets: a dictionary with the structure {service: {count:integer}...}
             userd to correlate the number of times a ticket is associated with a service type

    ticket_count: total number of tickets counted

    Output
    (top_key, top)
    top_key: service type with the highest probability
    top: probability associated with top_key
    '''

    priors={}
    posteriors={}
    for each in context_tickets.keys():
        if context_tickets[each]>0:
            priors[each]=float(context_tickets[each]['count'])/ticket_count

    for each in context_words.keys():
        posteriors[each]=0
        normalizer=0
        median_count=len(aTicket[1].split())
        for aWord in aTicket[1].split():
            if context_words[each].has_key(aWord):
                likelihood=float(context_words[each][aWord]['count'])/context_words[each]['count']
            else:
                likelihood=1.0/word_count

            l_h=0
            for p in context_tickets.keys():
                if context_words[p].has_key(aWord):
                    l_h+=float(context_words[p][aWord]['count'])/context_words[p]['count']
                    normalizer+=l_h*priors[p]

            ##print each,aWord,priors[each],likelihood,normalizer,(priors[each]*likelihood)/normalizer
            posteriors[each]+=(priors[each]*likelihood)/normalizer
        posteriors[each]=posteriors[each]/median_count
    k=posteriors.keys()
    k.sort()
    top_key=k[0]
    top=posteriors[top_key]
    for p in k:
        if posteriors[p]>top:
            top=posteriors[p]
            top_key=p
    return (top_key, top)
	def service_bayes(aTicket, context_words, word_count, context_tickets, ticket_count):
	'''
	Input
	aTicket: a ticket pulled from trac db stored as a list with the following fields
	id, summary, type, status, service, created_date, lastupdated_date, reporter

	context_words: a dictionary with the structure {service: {word: {count:integer}...}...}
	used to correlate the number of times a word is associated with a service type

	word_count: total number of words counted in all ticket summaries

	context_tickets: a dictionary with the structure {service: {count:integer}...}
	userd to correlate the number of times a ticket is associated with a service type

	ticket_count: total number of tickets counted

	Output
	(top_key, top)
	top_key: service type with the highest probability
	top: probability associated with top_key
	'''

	priors={}
	posteriors={}
	for each in context_tickets.keys():
	if context_tickets[each]>0:
	priors[each]=float(context_tickets[each]['count'])/ticket_count

	for each in context_words.keys():
	posteriors[each]=0
	normalizer=0
	median_count=len(aTicket[1].split())
	for aWord in aTicket[1].split():
	if context_words[each].has_key(aWord):
	likelihood=float(context_words[each][aWord]['count'])/context_words[each]['count']
	else:
	likelihood=1.0/word_count

	l_h=0
	for p in context_tickets.keys():
	if context_words[p].has_key(aWord):
	l_h+=float(context_words[p][aWord]['count'])/context_words[p]['count']
	normalizer+=l_h*priors[p]

	##print each,aWord,priors[each],likelihood,normalizer,(priors[each]*likelihood)/normalizer
	posteriors[each]+=(priors[each]*likelihood)/normalizer
	posteriors[each]=posteriors[each]/median_count
	k=posteriors.keys()
	k.sort()
	top_key=k[0]
	top=posteriors[top_key]
	for p in k:
	if posteriors[p]>top:
	top=posteriors[p]
	top_key=p
	return (top_key, top)