Last active
March 14, 2020 16:08
-
-
Save t-kurimura/c1c0bfca01769991f58530d90b295dfb to your computer and use it in GitHub Desktop.
フットサルの大会見つけるやつ
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import datetime | |
import locale | |
import urllib3 | |
import urllib.parse | |
from bs4 import BeautifulSoup | |
def safe_cast(val, to_type, default=None): | |
try: | |
return to_type(val) | |
except (ValueError, TypeError): | |
return default | |
def extract_place(title): | |
keywords = ["原宿", "渋谷", "池袋", "新宿"] | |
for key in keywords: | |
pattern = re.compile(rf'.*{key}.*') | |
if pattern.match(title) is not None: | |
return key | |
locale.setlocale(locale.LC_TIME, 'ja_JP.UTF-8') | |
now = datetime.datetime.now() | |
search_date_str = now.strftime("%Y-%m-%d") | |
def get_list(base_url): | |
outputs = [] | |
for page_num in range(10): | |
url = base_url + "/?page=" + str(page_num + 1)+ "&hold_on_from=" + search_date_str + "&embed=normal" | |
http = urllib3.PoolManager() | |
r = http.request('GET', url) | |
soup = BeautifulSoup(r.data, "html.parser") | |
ul = soup.select_one('#event_list') | |
children = ul.findAll("li") | |
for num in range(len(children) - 1): | |
body = soup.select_one("#event_list > li:nth-child(" + str(num + 1) + ")") | |
try: | |
date_str = body.select_one("div.date").text | |
except Exception as e: | |
continue | |
title = body.select_one("h2").text | |
place = extract_place(title + date_str) | |
rest_raw_str = body.select_one("div.capacity > span").text | |
rest_num_str = re.sub(r'\(|\)|(残り)|\s', "", rest_raw_str) | |
rest_num = safe_cast(rest_num_str, int) | |
if rest_num is not None and rest_num > 0 and place is not None: | |
datetime_str = re.sub(r'\|.*$', "", date_str) | |
date = datetime_str[:13] | |
start_time = datetime_str[13:18] | |
end_time = datetime_str[19:] | |
start_datetime = datetime.datetime.strptime('{} {}'.format(date, start_time), "%Y/%m/%d(%a) %H:%M") | |
end_datetime = datetime.datetime.strptime('{} {}'.format(date, end_time), "%Y/%m/%d(%a) %H:%M") | |
link_raw = body.select_one("a")["href"] | |
link = re.sub(r'\/$|\/\?.*$|(https://labola.jp)', "", link_raw) | |
formed_url = "https://labola.jp" + link | |
out_raw = { | |
"start_datetime": start_datetime, | |
"end_datetime": end_datetime, | |
"rest": rest_num, | |
"url": formed_url, | |
"place": place, | |
} | |
outputs.append(out_raw) | |
return outputs | |
# 大会 | |
total_list = get_list("https://labola.jp/reserve/events/search/tournament/area-13") | |
total_list.extend(get_list("https://labola.jp/r/event/3010")) | |
# 個サル | |
total_list = get_list("https://labola.jp/reserve/events/search/personal/area-13") | |
format_list = map(lambda x: "{} {} {}h @{} (残{}) {}".format( | |
x['start_datetime'].strftime("%m/%d (%a)"), | |
x['start_datetime'].strftime("%H:%M~"), | |
((x['end_datetime'] - x['start_datetime']).seconds/60/60), | |
x['place'], | |
x['rest'], | |
x['url'], | |
) , total_list) | |
for a in sorted(set(format_list)): | |
print(a) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment