Skip to content

Instantly share code, notes, and snippets.

@prongs
Created April 6, 2013 11:15
Show Gist options
  • Save prongs/5325771 to your computer and use it in GitHub Desktop.
Save prongs/5325771 to your computer and use it in GitHub Desktop.
twitter challange
import re
from collections import *
from itertools import *
class Prob:
users = []
def __init__(self, probs=None):
if probs is None:
self.probs = dict((k, 1.0) for k in self.users)
else:
self.probs = probs
def __mul__(self, p):
probs = {}
for key in self.probs:
probs[key] = self.probs[key] * p.probs[key]
return Prob(probs)
def __str__(self):
return str(self.probs)
def __repr__(self):
return "Prob(%s)" % (repr(self.probs))
def train(filename="trainingdata.txt"):
d = {}
with open(filename) as f:
num_lines = f.readline()
for line in f:
user, tweet = line.split(' ', 1)
d.setdefault(user, Counter([]))
c = Counter(tweet.lower().split())
# for w in ['to', 'the', '-', 'on', 'of', 'a', 'in', 'an', 'and', 'for', 'is', 'from',
# '&', 'about', 'my', 'I', 'at', 'with', 'we', 'are', 'can', 'this', 'that', 'our',
# 'us', 'you', 'your', 'me']:
# c[w.lower()] = 0
d[user] += c
Prob.users = d.keys()
probs = {}
totals = Counter()
for user in d:
totals += d[user]
for word in totals:
probs[word] = Prob(dict((user, float(d[user][word]+1)/float(totals[word] + len(d.keys()))) for user in d))
return probs
def predict(tweet, train_probs):
if tweet.lower().find('google') >= 0:
return 'google'
prob = Prob()
for word in tweet.lower().split():
if word in train_probs:
# If characteristic word, return immediately
if train_probs[word]
prob *= train_probs[word]
mx, mxk = 0, ''
for key in Prob.users:
if prob.probs[key] >= mx:
mxk = key
mx = prob.probs[key]
return mxk
if __name__ == '__main__':
# probs = train()
# print Prob()
probs = train()
for i in xrange(int(raw_input())):
print predict(raw_input(), probs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment