Created
January 17, 2011 07:51
-
-
Save vim13/782597 to your computer and use it in GitHub Desktop.
tweetの'(番組タイトル)録画'に反応してiepgを検索・解析、friio録画用のcron設定
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!~/lib/python | |
#vim:fileencoding=utf-8 | |
import time | |
import datetime | |
import re | |
import urllib | |
import urllib2 | |
import twitter | |
from BeautifulSoup import BeautifulSoup | |
import htmlentitydefs | |
crontab = '' #crontab設定ファイル | |
key = [] #検索用キーワード | |
tv = '' #録画タイトル | |
log = 'log:' #post用ログ | |
#urlopenモジュール | |
def httpget(address, user_agent='myagent'): | |
opener = urllib2.build_opener() | |
opener.addheaders = [('User-agent', user_agent)] | |
doc = opener.open(address).read() | |
return doc | |
#検索URL用エンコードモジュール | |
def htmlentity2unicode(text): | |
# 正規表現のコンパイル | |
reference_regex = re.compile(u'&(#x?[0-9a-f]+|[a-z]+);', re.IGNORECASE) | |
num16_regex = re.compile(u'#x\d+', re.IGNORECASE) | |
num10_regex = re.compile(u'#\d+', re.IGNORECASE) | |
result = u'' | |
i = 0 | |
while True: | |
# 実体参照 or 文字参照を見つける | |
match = reference_regex.search(text, i) | |
if match is None: | |
result += text[i:] | |
break | |
result += text[i:match.start()] | |
i = match.end() | |
name = match.group(1) | |
# 実体参照 | |
if name in htmlentitydefs.name2codepoint.keys(): | |
result += unichr(htmlentitydefs.name2codepoint[name]) | |
# 文字参照 | |
elif num16_regex.match(name): | |
# 16進数 | |
result += unichr(int(u'0'+name[1:], 16)) | |
elif num10_regex.match(name): | |
# 10進数 | |
result += unichr(int(name[1:])) | |
return result | |
def channel(c): | |
ch = {'DFS00400':'27', 'DFS00408':'26', 'DFS00410':'25', 'DFS00418':'22', 'DFS00420':'21', | |
'DFS00428':'24', 'DFS00430':'23','DFS05C38':'20', 'DFS00440':'28'} | |
return ch[c] | |
def getIepg(h): | |
global crontab | |
global tv | |
uri = re.search('<item rdf:about="http://tv.so-net.ne.jp/schedule/(.*?)\.', h).group(1) | |
iepg = httpget('http://tv.so-net.ne.jp/iepg.tvpid?id=' + uri) | |
elements = iepg.decode('shift_jis') | |
title = re.search('title: (.*?)\n', elements).group(1) | |
sub = re.search('subtitle: (.*?)\n', elements) | |
month = re.search('month: (.*?)\n', elements).group(1) | |
date = re.search('date: (.*?)\n', elements).group(1) | |
start_h = re.search('start: (.*?):', elements).group(1) | |
start_m = re.search('start: ..:(.*?)\n', elements).group(1) | |
end_h = re.search('end: (.*?):', elements).group(1) | |
end_m = re.search('end: ..:(.*?)\n', elements).group(1) | |
station = re.search('station: (.*?)\n', elements).group(1) | |
subtitle = '' | |
if sub != None: | |
subtitle = sub.group(1) | |
hours = str(datetime.datetime(2009,1,1,int(end_h),int(end_m)) - datetime.datetime(2009,1,1,int(start_h),int(start_m))) | |
minutes = int(re.search('(.*?):', hours).group(1))*3600 | |
seconds = int(re.search('.*?:(.*?):', hours).group(1))*60 | |
tv = title[:-1] #iepgにキャッジリターンが入ってるのでスライス | |
cron = start_m[:-1] + ' ' + start_h + ' ' + date[:-1] + ' ' + month[:-1] + ' * /Application | |
s/recfriio-018/recfriio -c ' + channel(station[:-1]) + ' -s ' + str(minutes + seconds - 1) + ' | |
-f /Volumes/HitachiNov09n1/friio/' + title[:-1] + subtitle[:-1] + '.ts\n' | |
crontab += cron | |
def search(keyword): | |
global tv | |
global log | |
rss = httpget('http://tv.so-net.ne.jp/rss/schedulesBySearch.action?stationPlatformId=1&cond | |
ition.keyword=' + keyword) | |
l = len(re.split('<rdf:li', rss)) | |
if l == 1: | |
log += '1' + '/' | |
else: | |
getIepg(rss) | |
log += tv + '/' | |
file = open('/hoge/recfriio_status_id.txt', 'r') | |
status_id = file.readline() | |
xml = httpget('http://twitter.com/statuses/user_timeline/n000dle.xml?since_id=' + status_id) | |
soup = BeautifulSoup(xml) | |
tweets = soup.findAll('text') | |
id = soup.findAll('id') | |
if id: | |
status_id = re.search('<id>(.*?)</id>', str(id[0])).group(1) | |
for s in tweets: | |
match = re.search(u'<text>(.*?)録画', s.encode('utf-8')) | |
if match != None: | |
key.append(match.group(1)) | |
if key: | |
for k in key: | |
g = htmlentity2unicode(k) | |
search(urllib.quote(g.encode('utf-8','replace').replace('?', ' '))) | |
api = twitter.Api('consumer_key='consumer_key', | |
consumer_secret='consumer_secret', access_token_key='access_token', access_token_secret='access_token_secret') | |
status = api.PostUpdate(log.encode('utf-8')) | |
q = '' | |
for line in open('/hoge/.cronpython','r'): | |
if re.search('\.ts', line): | |
q += line | |
file.close() | |
file = open('/hoge/.cronpython', 'w') | |
file.write('*/10 * * * * python /hoge/recfriio.py\n' + q + crontab.encode('utf-8') + '* * * * * crontab /hoge/.cronpython\n') | |
file.close() | |
file = open('/hoge/recfriio_status_id.txt', 'w') | |
file.write(status_id) | |
file.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment