Skip to content

Instantly share code, notes, and snippets.

@esehara
Created November 30, 2011 17:05
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save esehara/1409831 to your computer and use it in GitHub Desktop.
Save esehara/1409831 to your computer and use it in GitHub Desktop.
# -*- coding:utf-8 -*-
import MeCab
import json
import urllib2
import re
import time
# --- Define
mecab = MeCab.Tagger("mecabrc")
words = {}
result = []
# --- Read Page
def MecabRead(data):
for page in data[u'results']:
node = None
if re.compile('RT').match(page[u'text'].encode('utf-8')) == None:
result.append(page[u'text'].encode('utf-8'))
while node:
word = node.surface
if node.posid >= 36 and node.posid <= 67:
if not words.has_key(word):
words[word] = 0
words[word] += 1
node = node.next
def main():
file_path = "FIXME"
result_file_path = "FIXME"
for keys in open(file_path).read().split("\n"):
try:
key,val = keys.split(" ")
words[key] = int(val)
except ValueError:
pass
for p in range(1,15):
data = json.load(urllib2.urlopen("http://search.twitter.com/search.json?q=%E3%82%BB%E3%83%83%E3%82%AF%E3%82%B9&callback=?&page=" + str(p)))
MecabRead(data)
f = open(file_path,"w")
for word,count in sorted(words.items(),key = lambda x:x[1]):
f.write("%s %i\n" % (word,count))
f.close()
r = open(result_file_path,"a")
for s in result:
r.write(s + "\n")
r.close()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment