Skip to content

Instantly share code, notes, and snippets.

@lekevicius
Created March 14, 2013 14:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lekevicius/5161594 to your computer and use it in GitHub Desktop.
Save lekevicius/5161594 to your computer and use it in GitHub Desktop.
Output word frequencies.
from pattern.vector import *
import glob
from string import *
import operator
import json
words = ['sir', 'thank', 'love', 'god', 'life', 'night', 'shit', 'boy', 'girl', 'fuck', 'car', 'money', 'father', 'mother', 'hell', 'son', 'kill', 'dead', 'call', 'friend', 'stay', 'leave', 'baby', 'home', 'world']
documents = []
corpus = Corpus.load('years.corpus')
results = {}
for word in words:
years = {}
for year in range(1962, 2013):
years[year] = 0
results[word] = years
for document in corpus.documents:
year = document.name
for word in words:
strength = document.tf(word)
# results[word][year] = strength * 1000
print year + ' - ' + word + ' - ' + str(strength)
# output = json.dumps(results)
# print output
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment