Skip to content

Instantly share code, notes, and snippets.

@krrrr38
Created January 8, 2012 05:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save krrrr38/1577321 to your computer and use it in GitHub Desktop.
Save krrrr38/1577321 to your computer and use it in GitHub Desktop.
twitter data #programming
#!/usr/bin/python
# -*- coding: utf-8 -*-
'''ハッシュタグ#programmngからStatusListを受け取りファイルへテキストを保存する
1,「#programming」を条件にデータを取得
2,「, . ( ) 」をスペースに置換
3,小文字に変換
#4,スペースで文字を分割
#5,文字が#で始まっていたら#を削除
'''
import twitter
import re
def convert(text):
text = text.encode('utf-8')
text = re.sub(r'[,\.\(\)]',' ', text)
text = text.lower()
return text
def writeFile(searchList, fo):
for i in xrange(1, len(searchList)):
text = convert(searchList[i].text)
fo.write(text + '\n')
fo.close()
def getTimeLine(LANG = "en", SEARCH_WORD = "#programming",
per_page = 100,start_page = 1, end_page = 51):
api = twitter.Api()
for i in xrange(start_page,end_page):
searchList = api.GetSearch(
SEARCH_WORD, per_page=per_page, page=i, lang=LANG)
fileName = './tmp/'+ LANG + str(i)
f = open(fileName, 'w')
writeFile(searchList, f)
if __name__ == "__main__":
getTimeLine()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment