vim13/moyashi.py

## moyashi.py
#!/usr/lib/python
#vim:fileencoding=utf-8

import re
import random
import urllib
import urllib2
from BeautifulSoup import BeautifulSoup, NavigableString, Declaration, Comment

class markovTweet:
    def __init__(self, api, appid):
        self.appid = appid
        self.api = api

    def getNavigableStrings(self,soup):
        if isinstance(soup, NavigableString):
            if type(soup) not in (Comment, Declaration) and soup.strip():
                yield soup
        elif soup.name not in ('script', 'style'):
            for c in soup.contents:
               for g in self.getNavigableStrings(c):
                    yield g

    def getTweets(self):
        pageurl = 'http://list.chiebukuro.yahoo.co.jp/dir/list.php?dnum=2078297371&flg=3&agree=1'
        html = urllib2.urlopen(pageurl)
        soup = html.read()
        question = re.search('question_detail/q[0-9]+', soup).group(0)
        html = urllib2.urlopen('http://ksrd.yahoo.co.jp/PAGE=d2078297371/LOC=QUELIST/R=1/O=TITLE/ST=all/*-http://detail.chiebukuro.yahoo.co.jp/qa/' + question)
        soup = BeautifulSoup(html).find("div",{"class":"qa"})
        text = ("".join(self.getNavigableStrings(soup))).encode('utf-8')
        return re.sub('([_a-z0-9]+)さん|\n', '', text).strip()

    def yahooParse(self,tweets):
        pageurl = 'http://jlp.yahooapis.jp/MAService/V1/parse'
        results = 'ma'
        my_filter = '1|2|3|4|5|6|7|8|9|10|11|12|13'
        params = urllib.urlencode({'appid':self.appid, 'results':results, 'filter':my_filter, 'sentence':tweets})
        html = urllib2.urlopen(pageurl, params)
        return html

    def makeMarkov(self,wordlist):
        markov = {}
        w1 = ""
        w2 = ""
        for word in wordlist:
            if w1 and w2:
                markov[(w1, w2)] = word
            w1, w2 = w2, word
        markov[(w1, w2)] = ""
        wo1, wo2 = wordlist[0], wordlist[1]
        sentence = wo1 + wo2
        for n in markov:
            if not markov[(wo1,wo2)]:
                break
            else:
                tmp = markov[(wo1, wo2)]
                wo1, wo2 = wo2, tmp
                sentence += tmp
        return sentence.strip()

    def main(self):
        text = self.getTweets()
        html = self.yahooParse(text)
        soup = BeautifulSoup(html)
        wordlist = [(w.surface.string).encode('utf-8') for w in soup.ma_result.word_list]
        sentence = self.makeMarkov(wordlist)
        return sentence
	#!/usr/lib/python
	#vim:fileencoding=utf-8

	import re
	import random
	import urllib
	import urllib2
	from BeautifulSoup import BeautifulSoup, NavigableString, Declaration, Comment

	class markovTweet:
	def __init__(self, api, appid):
	self.appid = appid
	self.api = api

	def getNavigableStrings(self,soup):
	if isinstance(soup, NavigableString):
	if type(soup) not in (Comment, Declaration) and soup.strip():
	yield soup
	elif soup.name not in ('script', 'style'):
	for c in soup.contents:
	for g in self.getNavigableStrings(c):
	yield g

	def getTweets(self):
	pageurl = 'http://list.chiebukuro.yahoo.co.jp/dir/list.php?dnum=2078297371&flg=3&agree=1'
	html = urllib2.urlopen(pageurl)
	soup = html.read()
	question = re.search('question_detail/q[0-9]+', soup).group(0)
	html = urllib2.urlopen('http://ksrd.yahoo.co.jp/PAGE=d2078297371/LOC=QUELIST/R=1/O=TITLE/ST=all/*-http://detail.chiebukuro.yahoo.co.jp/qa/' + question)
	soup = BeautifulSoup(html).find("div",{"class":"qa"})
	text = ("".join(self.getNavigableStrings(soup))).encode('utf-8')
	return re.sub('([_a-z0-9]+)さん\|\n', '', text).strip()

	def yahooParse(self,tweets):
	pageurl = 'http://jlp.yahooapis.jp/MAService/V1/parse'
	results = 'ma'
	my_filter = '1\|2\|3\|4\|5\|6\|7\|8\|9\|10\|11\|12\|13'
	params = urllib.urlencode({'appid':self.appid, 'results':results, 'filter':my_filter, 'sentence':tweets})
	html = urllib2.urlopen(pageurl, params)
	return html

	def makeMarkov(self,wordlist):
	markov = {}
	w1 = ""
	w2 = ""
	for word in wordlist:
	if w1 and w2:
	markov[(w1, w2)] = word
	w1, w2 = w2, word
	markov[(w1, w2)] = ""
	wo1, wo2 = wordlist[0], wordlist[1]
	sentence = wo1 + wo2
	for n in markov:
	if not markov[(wo1,wo2)]:
	break
	else:
	tmp = markov[(wo1, wo2)]
	wo1, wo2 = wo2, tmp
	sentence += tmp
	return sentence.strip()

	def main(self):
	text = self.getTweets()
	html = self.yahooParse(text)
	soup = BeautifulSoup(html)
	wordlist = [(w.surface.string).encode('utf-8') for w in soup.ma_result.word_list]
	sentence = self.makeMarkov(wordlist)
	return sentence