vim13/recfriio.py

## recfriio.py
#!~/lib/python
#vim:fileencoding=utf-8

import time
import datetime
import re
import urllib
import urllib2
import twitter
from BeautifulSoup import BeautifulSoup
import htmlentitydefs

crontab = ''   #crontab設定ファイル
key = []       #検索用キーワード
tv = ''        #録画タイトル
log = 'log:'       #post用ログ

#urlopenモジュール
def httpget(address, user_agent='myagent'):
    opener = urllib2.build_opener()
    opener.addheaders = [('User-agent', user_agent)]
    doc = opener.open(address).read()
    return doc

#検索URL用エンコードモジュール
def htmlentity2unicode(text):
    # 正規表現のコンパイル
    reference_regex = re.compile(u'&(#x?[0-9a-f]+|[a-z]+);', re.IGNORECASE)
    num16_regex = re.compile(u'#x\d+', re.IGNORECASE)
    num10_regex = re.compile(u'#\d+', re.IGNORECASE)

    result = u''
    i = 0
    while True:
        # 実体参照 or 文字参照を見つける
        match = reference_regex.search(text, i)
        if match is None:
            result += text[i:]
            break

        result += text[i:match.start()]
        i = match.end()
        name = match.group(1)

        # 実体参照
        if name in htmlentitydefs.name2codepoint.keys():
            result += unichr(htmlentitydefs.name2codepoint[name])
        # 文字参照
        elif num16_regex.match(name):
            # 16進数
            result += unichr(int(u'0'+name[1:], 16))
        elif num10_regex.match(name):
            # 10進数
            result += unichr(int(name[1:]))

    return result

def channel(c):
    ch = {'DFS00400':'27', 'DFS00408':'26', 'DFS00410':'25', 'DFS00418':'22', 'DFS00420':'21',
'DFS00428':'24', 'DFS00430':'23','DFS05C38':'20', 'DFS00440':'28'}
    return ch[c]
def getIepg(h):
    global crontab
    global tv
    uri = re.search('<item rdf:about="http://tv.so-net.ne.jp/schedule/(.*?)\.', h).group(1)
    iepg = httpget('http://tv.so-net.ne.jp/iepg.tvpid?id=' + uri)
    elements  = iepg.decode('shift_jis')
    title = re.search('title: (.*?)\n', elements).group(1)
    sub = re.search('subtitle: (.*?)\n', elements)
    month = re.search('month: (.*?)\n', elements).group(1)
    date = re.search('date: (.*?)\n', elements).group(1)
    start_h = re.search('start: (.*?):', elements).group(1)
    start_m = re.search('start: ..:(.*?)\n', elements).group(1)
    end_h = re.search('end: (.*?):', elements).group(1)
    end_m = re.search('end: ..:(.*?)\n', elements).group(1)
    station = re.search('station: (.*?)\n', elements).group(1)

    subtitle = ''
    if sub != None:
        subtitle = sub.group(1)

    hours = str(datetime.datetime(2009,1,1,int(end_h),int(end_m)) - datetime.datetime(2009,1,1,int(start_h),int(start_m)))
    minutes = int(re.search('(.*?):', hours).group(1))*3600
    seconds = int(re.search('.*?:(.*?):', hours).group(1))*60

    tv = title[:-1]   #iepgにキャッジリターンが入ってるのでスライス

    cron = start_m[:-1] + ' ' + start_h + ' ' + date[:-1] + ' ' + month[:-1] + ' * /Application
s/recfriio-018/recfriio -c ' + channel(station[:-1]) + ' -s ' + str(minutes + seconds - 1) + '
-f /Volumes/HitachiNov09n1/friio/'  + title[:-1] + subtitle[:-1] + '.ts\n'
    crontab += cron

def search(keyword):
    global tv
    global log
    rss = httpget('http://tv.so-net.ne.jp/rss/schedulesBySearch.action?stationPlatformId=1&cond
ition.keyword=' + keyword)
    l = len(re.split('<rdf:li', rss))
    if l == 1:
        log += '1' + '/'
    else:
        getIepg(rss)
        log += tv + '/'

file = open('/hoge/recfriio_status_id.txt', 'r')
status_id = file.readline()

xml = httpget('http://twitter.com/statuses/user_timeline/n000dle.xml?since_id=' + status_id)
soup = BeautifulSoup(xml)
tweets = soup.findAll('text')
id = soup.findAll('id')
if id:
    status_id = re.search('<id>(.*?)</id>', str(id[0])).group(1)
    for s in tweets:
        match = re.search(u'<text>(.*?)&#37682;&#30011;', s.encode('utf-8'))
        if match != None:
            key.append(match.group(1))

if key:
    for k in key:
        g = htmlentity2unicode(k)
        search(urllib.quote(g.encode('utf-8','replace').replace('?', ' ')))

    api = twitter.Api('consumer_key='consumer_key',
consumer_secret='consumer_secret', access_token_key='access_token', access_token_secret='access_token_secret')
    status = api.PostUpdate(log.encode('utf-8'))

    q = ''
    for line in open('/hoge/.cronpython','r'):
        if re.search('\.ts', line):
            q += line
    file.close()

    file = open('/hoge/.cronpython', 'w')
    file.write('*/10 * * * * python /hoge/recfriio.py\n' + q + crontab.encode('utf-8') + '* * * * * crontab /hoge/.cronpython\n')
    file.close()

file = open('/hoge/recfriio_status_id.txt', 'w')
file.write(status_id)
file.close()
	#!~/lib/python
	#vim:fileencoding=utf-8

	import time
	import datetime
	import re
	import urllib
	import urllib2
	import twitter
	from BeautifulSoup import BeautifulSoup
	import htmlentitydefs

	crontab = '' #crontab設定ファイル
	key = [] #検索用キーワード
	tv = '' #録画タイトル
	log = 'log:' #post用ログ

	#urlopenモジュール
	def httpget(address, user_agent='myagent'):
	opener = urllib2.build_opener()
	opener.addheaders = [('User-agent', user_agent)]
	doc = opener.open(address).read()
	return doc

	#検索URL用エンコードモジュール
	def htmlentity2unicode(text):
	# 正規表現のコンパイル
	reference_regex = re.compile(u'&(#x?[0-9a-f]+\|[a-z]+);', re.IGNORECASE)
	num16_regex = re.compile(u'#x\d+', re.IGNORECASE)
	num10_regex = re.compile(u'#\d+', re.IGNORECASE)

	result = u''
	i = 0
	while True:
	# 実体参照 or 文字参照を見つける
	match = reference_regex.search(text, i)
	if match is None:
	result += text[i:]
	break

	result += text[i:match.start()]
	i = match.end()
	name = match.group(1)

	# 実体参照
	if name in htmlentitydefs.name2codepoint.keys():
	result += unichr(htmlentitydefs.name2codepoint[name])
	# 文字参照
	elif num16_regex.match(name):
	# 16進数
	result += unichr(int(u'0'+name[1:], 16))
	elif num10_regex.match(name):
	# 10進数
	result += unichr(int(name[1:]))

	return result

	def channel(c):
	ch = {'DFS00400':'27', 'DFS00408':'26', 'DFS00410':'25', 'DFS00418':'22', 'DFS00420':'21',
	'DFS00428':'24', 'DFS00430':'23','DFS05C38':'20', 'DFS00440':'28'}
	return ch[c]
	def getIepg(h):
	global crontab
	global tv
	uri = re.search('<item rdf:about="http://tv.so-net.ne.jp/schedule/(.*?)\.', h).group(1)
	iepg = httpget('http://tv.so-net.ne.jp/iepg.tvpid?id=' + uri)
	elements = iepg.decode('shift_jis')
	title = re.search('title: (.*?)\n', elements).group(1)
	sub = re.search('subtitle: (.*?)\n', elements)
	month = re.search('month: (.*?)\n', elements).group(1)
	date = re.search('date: (.*?)\n', elements).group(1)
	start_h = re.search('start: (.*?):', elements).group(1)
	start_m = re.search('start: ..:(.*?)\n', elements).group(1)
	end_h = re.search('end: (.*?):', elements).group(1)
	end_m = re.search('end: ..:(.*?)\n', elements).group(1)
	station = re.search('station: (.*?)\n', elements).group(1)

	subtitle = ''
	if sub != None:
	subtitle = sub.group(1)

	hours = str(datetime.datetime(2009,1,1,int(end_h),int(end_m)) - datetime.datetime(2009,1,1,int(start_h),int(start_m)))
	minutes = int(re.search('(.?):', hours).group(1))3600
	seconds = int(re.search('.?:(.?):', hours).group(1))*60

	tv = title[:-1] #iepgにキャッジリターンが入ってるのでスライス

	cron = start_m[:-1] + ' ' + start_h + ' ' + date[:-1] + ' ' + month[:-1] + ' * /Application
	s/recfriio-018/recfriio -c ' + channel(station[:-1]) + ' -s ' + str(minutes + seconds - 1) + '
	-f /Volumes/HitachiNov09n1/friio/' + title[:-1] + subtitle[:-1] + '.ts\n'
	crontab += cron

	def search(keyword):
	global tv
	global log
	rss = httpget('http://tv.so-net.ne.jp/rss/schedulesBySearch.action?stationPlatformId=1&cond
	ition.keyword=' + keyword)
	l = len(re.split('<rdf:li', rss))
	if l == 1:
	log += '1' + '/'
	else:
	getIepg(rss)
	log += tv + '/'

	file = open('/hoge/recfriio_status_id.txt', 'r')
	status_id = file.readline()

	xml = httpget('http://twitter.com/statuses/user_timeline/n000dle.xml?since_id=' + status_id)
	soup = BeautifulSoup(xml)
	tweets = soup.findAll('text')
	id = soup.findAll('id')
	if id:
	status_id = re.search('<id>(.*?)</id>', str(id[0])).group(1)
	for s in tweets:
	match = re.search(u'<text>(.*?)録画', s.encode('utf-8'))
	if match != None:
	key.append(match.group(1))

	if key:
	for k in key:
	g = htmlentity2unicode(k)
	search(urllib.quote(g.encode('utf-8','replace').replace('?', ' ')))

	api = twitter.Api('consumer_key='consumer_key',
	consumer_secret='consumer_secret', access_token_key='access_token', access_token_secret='access_token_secret')
	status = api.PostUpdate(log.encode('utf-8'))

	q = ''
	for line in open('/hoge/.cronpython','r'):
	if re.search('\.ts', line):
	q += line
	file.close()

	file = open('/hoge/.cronpython', 'w')
	file.write('/10 * * * python /hoge/recfriio.py\n' + q + crontab.encode('utf-8') + '* * * * * crontab /hoge/.cronpython\n')
	file.close()

	file = open('/hoge/recfriio_status_id.txt', 'w')
	file.write(status_id)
	file.close()