Created
August 16, 2010 17:20
-
-
Save vim13/527326 to your computer and use it in GitHub Desktop.
過去のtweetsからマルコフ連鎖を用いたセンテンスを生成しtweet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/lib/python | |
#vim:fileencoding=utf-8 | |
import re | |
import twitter | |
def myApi(consumer_key, consumer_secret, access_token, access_token_secret): | |
api = twitter.Api(consumer_key, consumer_secret, access_token, access_token_secret) | |
return api | |
consumer_key = 'hoge' | |
consumer_secret = 'hoge' | |
access_token = 'hoge' | |
access_token_secret = 'hoge' | |
api = myApi(consumer_key, consumer_secret, access_token, access_token_secret) | |
screen_name = 'screen_name' | |
filename = 'hoge.txt' | |
lists = api.GetUserTimeline(screen_name, count=200) | |
tweets = '' | |
p = re.compile('\n|(@.+ )|(#.+)|(https?://[A-Za-z0-9\'~+\-=_.,/%\?!;:@#\*&\(\)]+)') | |
for s in lists: | |
tweets += p.sub('', s.text) | |
idfile = open(filename, 'w') | |
idfile.writelines(tweets.encode('utf-8')) | |
idfile.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/lib/python | |
#vim:fileencoding=utf-8 | |
import random | |
import urllib | |
import urllib2 | |
from BeautifulSoup import BeautifulSoup | |
import twitter | |
class markovTweet: | |
def __init__(self, appid, consumer_key, consumer_secret, access_token, access_token_secret, filename): | |
self.appid = appid | |
self.api = twitter.Api(consumer_key, consumer_secret, access_token, access_token_secret) | |
self.filename = filename | |
def getTweets(self): | |
tweets_file = open(self.filename, 'r') | |
tweets = tweets_file.read() | |
tweets_file.close() | |
return tweets | |
def yahooParse(self,tweets): | |
pageurl = 'http://jlp.yahooapis.jp/MAService/V1/parse' | |
results = 'ma' | |
my_filter = '1|2|3|4|5|6|7|8|9|10|11|12|13' | |
params = urllib.urlencode({'appid':self.appid, 'results':results, 'filter':my_filter, 'sentence':tweets}) | |
html = urllib2.urlopen(pageurl, params) | |
return html | |
def makeMarkov(self,wordlist): | |
markov = {} | |
w1 = (u'', u'') | |
w2 = (u'', u'') | |
for word in wordlist: | |
if w1[0] and w2[0]: | |
if(w1, w2) not in markov: | |
markov[(w1,w2)] = [] | |
markov[(w1,w2)].append((word[0], word[1])) | |
w1, w2 = w2, word | |
count1 = 0 | |
count2 = 0 | |
sentence = u"error:文頭に適切な品詞なし" | |
while count1 < 50: | |
w1, w2 = random.choice(markov.keys()) | |
poslist = [u'接尾辞',u'動詞',u'助詞',u'助動詞',u'特殊'] | |
if w1[1] not in poslist: | |
sentence = w1[0] + w2[0] | |
cnt = random.randint(1,70) | |
while count2 < cnt: | |
tmp = random.choice(markov[(w1,w2)]) | |
sentence += tmp[0] | |
w1, w2 = w2, tmp | |
count2 += 1 | |
return sentence | |
break | |
return sentence | |
def main(self): | |
tweets = self.getTweets() | |
html = self.yahooParse(tweets) | |
soup = BeautifulSoup(html.read()) | |
wordlist = [(w.surface.string, w.pos.string) for w in soup.ma_result.word_list] | |
sentence = self.makeMarkov(wordlist) | |
self.api.PostUpdates(sentence.encode('utf-8')) | |
if __name__ == '__main__': | |
appid = 'Yahoo!Japan アプリケーションID' | |
consumer_key = 'hoge' | |
consumer_secret = 'hoge' | |
access_token = 'hoge' | |
access_token_secret = 'hoge' | |
filename = 'hoge.txt' | |
obj = markovTweet(appid, consumer_key, consumer_secret, access_token, access_token_secret, filename) | |
obj.main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment