Created
February 1, 2013 03:49
Revisions
-
tedwards created this gist
Feb 1, 2013 .There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,51 @@ #!/usr/bin/env python import sys import time import smtplib import datetime import pickle import random import nltk from pysqlite2 import dbapi2 as sqlite from nltk.tokenize import word_tokenize TEST=True today=datetime.datetime.now() class Collector: def __init__(self, dbfile): self.sql = '''SELECT id , summary, t.type AS type, t.status as status, c.value as service, date(time/1000000, 'unixepoch') AS created, date(t.changetime/1000000, 'unixepoch') as last_updated , reporter FROM ticket t, ticket_custom c WHERE status='closed' AND c.ticket = t.id AND c.name = 'service' ''' ## 0 id 1 summary 2 type 3 status 4 service 5 created 6 last_updated 7 reporter self.db = sqlite.connect(dbfile) self.cursor = self.db.cursor() self.cursor.execute(self.sql) self.tickets = self.cursor.fetchall() def service_type_features(self): ''' Returns a feature set Mapping ticket summaries to service types''' featureset=[] for aTicket in self.tickets: service=aTicket[4] for aWord in word_tokenize(aTicket[1]): featureset.append( ( {'contains-word(%s)'%aWord : True}, service) ) return featureset if __name__=='__main__': collector=Collector(sqlitedb) classifier=nltk.NaiveBayesClassifier.train(collector.service_type_features()) print nltk.classify.accuracy(classifier, collector.service_type_features()) print classifier.show_most_informative_features(5)