Skip to content

Instantly share code, notes, and snippets.

@tedwards
Created February 1, 2013 03:49

Revisions

  1. tedwards created this gist Feb 1, 2013.
    51 changes: 51 additions & 0 deletions nltk_service_bayes.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,51 @@
    #!/usr/bin/env python

    import sys
    import time
    import smtplib
    import datetime
    import pickle
    import random

    import nltk
    from pysqlite2 import dbapi2 as sqlite
    from nltk.tokenize import word_tokenize

    TEST=True

    today=datetime.datetime.now()

    class Collector:
    def __init__(self, dbfile):
    self.sql = '''SELECT id , summary, t.type AS type,
    t.status as status, c.value as service,
    date(time/1000000, 'unixepoch') AS created,
    date(t.changetime/1000000, 'unixepoch') as last_updated ,
    reporter
    FROM ticket t, ticket_custom c
    WHERE status='closed'
    AND c.ticket = t.id AND c.name = 'service'
    '''
    ## 0 id 1 summary 2 type 3 status 4 service 5 created 6 last_updated 7 reporter
    self.db = sqlite.connect(dbfile)
    self.cursor = self.db.cursor()

    self.cursor.execute(self.sql)
    self.tickets = self.cursor.fetchall()

    def service_type_features(self):
    ''' Returns a feature set
    Mapping ticket summaries to service types'''
    featureset=[]
    for aTicket in self.tickets:
    service=aTicket[4]
    for aWord in word_tokenize(aTicket[1]):
    featureset.append( ( {'contains-word(%s)'%aWord : True}, service) )
    return featureset

    if __name__=='__main__':
    collector=Collector(sqlitedb)
    classifier=nltk.NaiveBayesClassifier.train(collector.service_type_features())
    print nltk.classify.accuracy(classifier, collector.service_type_features())
    print classifier.show_most_informative_features(5)