macoril/race_entry.py

## race_entry.py
import csv
import requests
from bs4 import BeautifulSoup

def main():
    stallions = [
                 {'name' : 'グラスワンダー',     'id' : '0000299089'}
                ,{'name' : 'スクリーンヒーロー', 'id' : '0000801447'}
                ,{'name' : 'アーネストリー',     'id' : '0000885246'}
                ,{'name' : 'モーリス',           'id' : '0001138240'}
                ,{'name' : 'ミキノバンジョー',   'id' : '0001041054'}
                ,{'name' : 'ゴールドアクター',   'id' : '0001136428'}
                ,{'name' : 'クラウンレガーロ',   'id' : '0001124234'}
                ,{'name' : 'グァンチャーレ',     'id' : '0001153071'}
            ]

    with open("race_entry_list.csv", "w", encoding='utf-8') as file:
        writer = csv.writer(file)
        for stallion in stallions:
            load_url = 'https://www.jbis.or.jp/horse/{}/sire/entry/'.format(stallion['id'])
            html = requests.get(load_url)
            bs = BeautifulSoup(html.content, "html.parser")

            target = 'name: {} id: {}'.format(stallion['name'], stallion['id'])
            #writer.writerow([target])
            result = 'name: {} url: {}'.format(bs.find("h1").text, load_url)
            writer.writerow([result])
            print(target)
            print(result)

            # 産駒の出走予定を取得
            for title, table in zip(bs.find_all("h2"), bs.find_all("table")):
                # 開催日
                writer.writerow([title.text])
                print(title.text)

                # 出走内容
                rows = table.findAll("tr")
                for row in rows:
                    columns = [v.text for v in row.find_all(['td', 'th'])]
                    writer.writerow(columns)

                    # そのままだと表示が崩れるのでレース名の長さを調整
                    #if 1 == len(row.find_all('th')):
                        #print('{}b'.format(columns[2].strip()))
                        #print('{}c'.format(columns[2].strip().ljust(13, '　')))
                        #columns[2] = columns[2].strip().ljust(15, '　')
                    columns[0] = mb_ljust(columns[0].strip(),6)
                    columns[2] = mb_ljust(columns[2].strip(),28)
                    columns[5] = mb_ljust(columns[5].strip(),8)
                    print('\t'.join(columns))
                print('')
            print('')

import unicodedata
def mb2_len_count(string):
    count = 0
    for c in string:
        if unicodedata.east_asian_width(c) in 'FWA':
            count += 2
        else:
            count += 1
    return count

def mb_ljust(string, width, padding=' '):
    output_width = mb2_len_count(string)
    padding_size = max([0, width - output_width])
    result = string + padding * padding_size
    return result

main()
	import csv
	import requests
	from bs4 import BeautifulSoup

	def main():
	stallions = [
	{'name' : 'グラスワンダー', 'id' : '0000299089'}
	,{'name' : 'スクリーンヒーロー', 'id' : '0000801447'}
	,{'name' : 'アーネストリー', 'id' : '0000885246'}
	,{'name' : 'モーリス', 'id' : '0001138240'}
	,{'name' : 'ミキノバンジョー', 'id' : '0001041054'}
	,{'name' : 'ゴールドアクター', 'id' : '0001136428'}
	,{'name' : 'クラウンレガーロ', 'id' : '0001124234'}
	,{'name' : 'グァンチャーレ', 'id' : '0001153071'}
	]

	with open("race_entry_list.csv", "w", encoding='utf-8') as file:
	writer = csv.writer(file)
	for stallion in stallions:
	load_url = 'https://www.jbis.or.jp/horse/{}/sire/entry/'.format(stallion['id'])
	html = requests.get(load_url)
	bs = BeautifulSoup(html.content, "html.parser")

	target = 'name: {} id: {}'.format(stallion['name'], stallion['id'])
	#writer.writerow([target])
	result = 'name: {} url: {}'.format(bs.find("h1").text, load_url)
	writer.writerow([result])
	print(target)
	print(result)

	# 産駒の出走予定を取得
	for title, table in zip(bs.find_all("h2"), bs.find_all("table")):
	# 開催日
	writer.writerow([title.text])
	print(title.text)

	# 出走内容
	rows = table.findAll("tr")
	for row in rows:
	columns = [v.text for v in row.find_all(['td', 'th'])]
	writer.writerow(columns)

	# そのままだと表示が崩れるのでレース名の長さを調整
	#if 1 == len(row.find_all('th')):
	#print('{}b'.format(columns[2].strip()))
	#print('{}c'.format(columns[2].strip().ljust(13, '　')))
	#columns[2] = columns[2].strip().ljust(15, '　')
	columns[0] = mb_ljust(columns[0].strip(),6)
	columns[2] = mb_ljust(columns[2].strip(),28)
	columns[5] = mb_ljust(columns[5].strip(),8)
	print('\t'.join(columns))
	print('')
	print('')

	import unicodedata
	def mb2_len_count(string):
	count = 0
	for c in string:
	if unicodedata.east_asian_width(c) in 'FWA':
	count += 2
	else:
	count += 1
	return count

	def mb_ljust(string, width, padding=' '):
	output_width = mb2_len_count(string)
	padding_size = max([0, width - output_width])
	result = string + padding * padding_size
	return result

	main()