Skip to content

Instantly share code, notes, and snippets.

@mu-777
Last active February 25, 2018 11:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mu-777/519ff689be24aa66253948a43611ec5d to your computer and use it in GitHub Desktop.
Save mu-777/519ff689be24aa66253948a43611ec5d to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import urllib
from bs4 import BeautifulSoup
def get_url(parsedurl, page):
qrys = urllib.parse.parse_qs(parsedurl.query)
return '{0}://{1}{2}?{3}'.format(parsedurl.scheme, parsedurl.netloc, parsedurl.path,
'&'.join(['page={0}'.format(page),
'g={0}'.format(qrys['g'][0])]))
def get_maxpage(soup):
def isint(intstr):
try:
int(intstr)
except:
return False
return True
pages = [int(s.string) for s in soup.find(class_='table1').find_all('a') if isint(s.string)]
return max(pages) if len(pages) > 0 else 1
def get_name(soup):
return [s.prettify().split('\n')[2][2:] for s in soup.find_all(class_='member')]
def get_dates(soup):
return [s.string for s in soup.find_all(class_='date')]
def get_answers(soup, rawidx):
raw_parent = soup.find_all(class_='date')[rawidx].parent
return [s.string for s in raw_parent.find_all(class_='mark')]
def get_title(soup):
return soup.find(class_='title').string
# --------------------------------------------
if __name__ == '__main__':
url = sys.argv[1]
outdir = sys.argv[2] if len(sys.argv) > 2 else os.path.abspath(os.curdir)
names, answers = [], {}
parsedurl = urllib.parse.urlparse(url)
html = urllib.request.urlopen(get_url(parsedurl, 0))
soup = BeautifulSoup(html, "lxml")
title = get_title(soup).strip().rstrip()
maxpage = get_maxpage(soup)
dates = get_dates(soup)
answers = {date: [] for date in dates}
for pageid in range(maxpage):
html = urllib.request.urlopen(get_url(parsedurl, pageid))
soup = BeautifulSoup(html, "lxml")
names += get_name(soup)
for i, date in enumerate(dates):
answers[date] += get_answers(soup, i)
with open(os.path.join(outdir, '[ちょー助]{0}.csv'.format(title)), 'w') as f:
l = ','.join(['name'] + dates)
print(l)
f.write(l + '\n')
for i, name in enumerate(names):
l = ','.join([name] + [answers[d][i] for d in dates])
print(l)
f.write(l + '\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment