Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Output the release calender of goods relating to games
# -*- coding: utf-8 -*-
import requests
import urllib
import os.path
import sys
import codecs
import htmlentitydefs
import re
from bs4 import BeautifulSoup
def print_out(text, output):
if output == 'File':
return text + '\n'
elif output == 'Console':
print text
return ''
# 実体参照 & 文字参照を通常の文字に戻す
def htmlentity2unicode(text):
# 正規表現のコンパイル
reference_regex = re.compile(u'&(#x?[0-9a-f]+|[a-z]+);', re.IGNORECASE)
num16_regex = re.compile(u'#x\d+', re.IGNORECASE)
num10_regex = re.compile(u'#\d+', re.IGNORECASE)
result = u''
i = 0
while True:
# 実体参照 or 文字参照を見つける
match = reference_regex.search(text, i)
if match is None:
result += text[i:]
break
result += text[i:match.start()]
i = match.end()
name = match.group(1)
# 実体参照
if name in htmlentitydefs.name2codepoint.keys():
result += unichr(htmlentitydefs.name2codepoint[name])
# 文字参照
elif num16_regex.match(name):
# 16進数
result += unichr(int(u'0'+name[1:], 16))
elif num10_regex.match(name):
# 10進数
result += unichr(int(name[1:]))
return result
argvs = sys.argv
output = 'Console'
out_txt = ''
params = ''
if len(argvs) == 3:
output = 'File'
else:
sys.stdout = codecs.getwriter(sys.stdout.encoding)(sys.stdout, errors='replace')
if len(argvs) <= 1:
out_txt = out_txt + print_out(u'今月発売のゲームカレンダー', output)
else:
ym = argvs[1].split('/')
params = urllib.urlencode(
{'year': ym[0],
'month': ym[1],})
out_txt = out_txt + print_out(ym[0] + u'' + ym[1] + u'月発売のゲームカレンダー', output)
req = requests.get('http://calendar.gameiroiro.com/game.php?' + params)
soup = BeautifulSoup(req.text.encode(req.encoding))
days = soup.find_all("tr")
counter = 0
for day in days:
if counter != 0:
out_txt = out_txt + print_out('\n' + unicode(counter) + unicode(u''), output)
softs = day.select("td div div div.product-description-right")
for s in softs:
genre = s.select("p.p-genre span")[0].text
name = s.a.text.strip()
name = htmlentity2unicode(name)
company = s.select("p.p-company span")[0].text
out_txt = out_txt + print_out(name + '(' + company + ')[' + genre + ']', output)
counter = counter + 1
if output == 'File':
f = codecs.open(argvs[2], 'w', 'utf-8')
f.write(out_txt)
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment