Skip to content

Instantly share code, notes, and snippets.

@ikegami-yukino
Created October 1, 2012 07:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ikegami-yukino/3810017 to your computer and use it in GitHub Desktop.
Save ikegami-yukino/3810017 to your computer and use it in GitHub Desktop.
複数サイトから注目キーワードを取得する
# -*- coding: utf-8 -*-
import urllib, re, os
from BeautifulSoup import BeautifulSoup
urls = (\
'http://search.biglobe.ne.jp/rss/ranking.xml',\
'http://trackword.rssfeed.cc/index.xml',\
'http://www.jtb.co.jp/ranking/keyword/rss.aspx',\
'http://www.nilab.info/buzztube/buzztube.xml',\
'http://ranking.goo.ne.jp/rss/keyword/keyrank_all1/index.rdf',\
'http://searchranking.yahoo.co.jp/rss/burst_ranking-rss.xml',\
'http://d.hatena.ne.jp/hotkeyword?mode=rss',\
'http://www.google.co.jp/trends/hottrends/atom/feed?pn=p4'
)
naver_url = 'http://topicwords.naver.jp/ranking'
wdir = os.path.abspath(os.path.dirname(__file__))+'/'
filename = wdir+'trend.txt'
html_entities = BeautifulSoup.HTML_ENTITIES
kagikakko = re.compile(u'「(.*)」')
sumikakko = re.compile(u'【.*】')
naver_topics = re.compile('topics\?q=([^&]+)')
got_keywords = []
# URLを開く(引数のbsをFalseにするとBeautiful Soupを使わない)
def openURL(url, bs=True):
data = urllib.urlopen(url).read()
return BeautifulSoup(data,convertEntities = html_entities) if bs is True else data
# RSSからキーワード抽出
def get_rss_keywords(url):
global got_keywords
data = openURL(url)
for item in data.findAll('item'):
for title in item.findAll('title'):
keywords = sumikakko.sub('',title.string)
if kagikakko.search(keywords):
got_keywords.append(''.join(kagikakko.findall(keywords)))
elif len(keywords.split(' ')) > 1:
for keyword in keywords.split(' '):
got_keywords.append(keyword)
else:
got_keywords.append(keywords)
def main():
global got_keywords
# RSSからキーワード抽出
for url in urls:
get_rss_keywords(url)
# NAVERトピックワードランキングから抽出
data = openURL(naver_url, bs=False)
for keyword in naver_topics.findall(data):
keywords = urllib.unquote(keyword)
for keyword in keywords.split('+'):
got_keywords.append(keyword)
got_keywords = list(set(got_keywords))
# ファイルに書き込む
with open(filename, 'a') as outfile:
outfile.write('\n'.join(got_keywords))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment