Skip to content

Instantly share code, notes, and snippets.

@prongs
Created April 6, 2013 11:32
Show Gist options
  • Save prongs/5325805 to your computer and use it in GitHub Desktop.
Save prongs/5325805 to your computer and use it in GitHub Desktop.
twitter challange again
import collections
users = []
def train(filename="trainingdata.txt"):
global users
tweeters = {}
first = True
f = open(filename)
for line in f:
if first:
first = False
continue
user, tweet = line.split(' ', 1)
tweeters.setdefault(user, collections.Counter([]))
c = collections.Counter(tweet.lower().split())
tweeters[user] += c
f.close()
users = tweeters.keys()
training_probabilities = {}
totals = collections.Counter()
for user in tweeters:
totals += tweeters[user]
for word in totals:
training_probabilities[word] = dict((user, float(tweeters[user][word]+1)/float(totals[word] + len(tweeters.keys()))) for user in tweeters)
return training_probabilities
def predict(tweet, training_probabilities):
global users
current_probabilities = dict((x, 1.0) for x in users)
for word in tweet.lower().split():
if word in training_probabilities:
for key in current_probabilities:
current_probabilities[key] *= training_probabilities[word][key]
# print current_probabilities
max_value, max_key = 0, ''
for key in users:
if current_probabilities[key] >= max_value:
max_key = key
max_value = current_probabilities[key]
return max_key
if __name__ == '__main__':
# probs = train()
# print Prob()
training_probabilities = train()
for i in xrange(int(raw_input())):
print predict(raw_input(), training_probabilities)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment