Last active
January 30, 2021 13:44
-
-
Save macoril/c3c76409962b6f2336fe8f62bb99c350 to your computer and use it in GitHub Desktop.
JBISからグラスワンダー系の出走予定を取得して出力(ターミナル上&csv)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import requests | |
from bs4 import BeautifulSoup | |
def main(): | |
stallions = [ | |
{'name' : 'グラスワンダー', 'id' : '0000299089'} | |
,{'name' : 'スクリーンヒーロー', 'id' : '0000801447'} | |
,{'name' : 'アーネストリー', 'id' : '0000885246'} | |
,{'name' : 'モーリス', 'id' : '0001138240'} | |
,{'name' : 'ミキノバンジョー', 'id' : '0001041054'} | |
,{'name' : 'ゴールドアクター', 'id' : '0001136428'} | |
,{'name' : 'クラウンレガーロ', 'id' : '0001124234'} | |
,{'name' : 'グァンチャーレ', 'id' : '0001153071'} | |
] | |
with open("race_entry_list.csv", "w", encoding='utf-8') as file: | |
writer = csv.writer(file) | |
for stallion in stallions: | |
load_url = 'https://www.jbis.or.jp/horse/{}/sire/entry/'.format(stallion['id']) | |
html = requests.get(load_url) | |
bs = BeautifulSoup(html.content, "html.parser") | |
target = 'name: {} id: {}'.format(stallion['name'], stallion['id']) | |
#writer.writerow([target]) | |
result = 'name: {} url: {}'.format(bs.find("h1").text, load_url) | |
writer.writerow([result]) | |
print(target) | |
print(result) | |
# 産駒の出走予定を取得 | |
for title, table in zip(bs.find_all("h2"), bs.find_all("table")): | |
# 開催日 | |
writer.writerow([title.text]) | |
print(title.text) | |
# 出走内容 | |
rows = table.findAll("tr") | |
for row in rows: | |
columns = [v.text for v in row.find_all(['td', 'th'])] | |
writer.writerow(columns) | |
# そのままだと表示が崩れるのでレース名の長さを調整 | |
#if 1 == len(row.find_all('th')): | |
#print('{}b'.format(columns[2].strip())) | |
#print('{}c'.format(columns[2].strip().ljust(13, ' '))) | |
#columns[2] = columns[2].strip().ljust(15, ' ') | |
columns[0] = mb_ljust(columns[0].strip(),6) | |
columns[2] = mb_ljust(columns[2].strip(),28) | |
columns[5] = mb_ljust(columns[5].strip(),8) | |
print('\t'.join(columns)) | |
print('') | |
print('') | |
import unicodedata | |
def mb2_len_count(string): | |
count = 0 | |
for c in string: | |
if unicodedata.east_asian_width(c) in 'FWA': | |
count += 2 | |
else: | |
count += 1 | |
return count | |
def mb_ljust(string, width, padding=' '): | |
output_width = mb2_len_count(string) | |
padding_size = max([0, width - output_width]) | |
result = string + padding * padding_size | |
return result | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment