Skip to content

Instantly share code, notes, and snippets.

@t-kurimura
Last active March 14, 2020 16:08
Show Gist options
  • Save t-kurimura/c1c0bfca01769991f58530d90b295dfb to your computer and use it in GitHub Desktop.
Save t-kurimura/c1c0bfca01769991f58530d90b295dfb to your computer and use it in GitHub Desktop.
フットサルの大会見つけるやつ
import re
import datetime
import locale
import urllib3
import urllib.parse
from bs4 import BeautifulSoup
def safe_cast(val, to_type, default=None):
try:
return to_type(val)
except (ValueError, TypeError):
return default
def extract_place(title):
keywords = ["原宿", "渋谷", "池袋", "新宿"]
for key in keywords:
pattern = re.compile(rf'.*{key}.*')
if pattern.match(title) is not None:
return key
locale.setlocale(locale.LC_TIME, 'ja_JP.UTF-8')
now = datetime.datetime.now()
search_date_str = now.strftime("%Y-%m-%d")
def get_list(base_url):
outputs = []
for page_num in range(10):
url = base_url + "/?page=" + str(page_num + 1)+ "&hold_on_from=" + search_date_str + "&embed=normal"
http = urllib3.PoolManager()
r = http.request('GET', url)
soup = BeautifulSoup(r.data, "html.parser")
ul = soup.select_one('#event_list')
children = ul.findAll("li")
for num in range(len(children) - 1):
body = soup.select_one("#event_list > li:nth-child(" + str(num + 1) + ")")
try:
date_str = body.select_one("div.date").text
except Exception as e:
continue
title = body.select_one("h2").text
place = extract_place(title + date_str)
rest_raw_str = body.select_one("div.capacity > span").text
rest_num_str = re.sub(r'\(|\)|(残り)|\s', "", rest_raw_str)
rest_num = safe_cast(rest_num_str, int)
if rest_num is not None and rest_num > 0 and place is not None:
datetime_str = re.sub(r'\|.*$', "", date_str)
date = datetime_str[:13]
start_time = datetime_str[13:18]
end_time = datetime_str[19:]
start_datetime = datetime.datetime.strptime('{} {}'.format(date, start_time), "%Y/%m/%d(%a) %H:%M")
end_datetime = datetime.datetime.strptime('{} {}'.format(date, end_time), "%Y/%m/%d(%a) %H:%M")
link_raw = body.select_one("a")["href"]
link = re.sub(r'\/$|\/\?.*$|(https://labola.jp)', "", link_raw)
formed_url = "https://labola.jp" + link
out_raw = {
"start_datetime": start_datetime,
"end_datetime": end_datetime,
"rest": rest_num,
"url": formed_url,
"place": place,
}
outputs.append(out_raw)
return outputs
# 大会
total_list = get_list("https://labola.jp/reserve/events/search/tournament/area-13")
total_list.extend(get_list("https://labola.jp/r/event/3010"))
# 個サル
total_list = get_list("https://labola.jp/reserve/events/search/personal/area-13")
format_list = map(lambda x: "{} {} {}h @{} (残{}) {}".format(
x['start_datetime'].strftime("%m/%d (%a)"),
x['start_datetime'].strftime("%H:%M~"),
((x['end_datetime'] - x['start_datetime']).seconds/60/60),
x['place'],
x['rest'],
x['url'],
) , total_list)
for a in sorted(set(format_list)):
print(a)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment