Last active
April 11, 2024 08:41
-
-
Save kun432/d29085ce93a0b4a2eb952dea626d172a to your computer and use it in GitHub Desktop.
netkeibaのレース結果から隊列をplotlyで可視化
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"authorship_tag": "ABX9TyOzfWbYtiFz6AKeeY+jcW/5", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/kun432/d29085ce93a0b4a2eb952dea626d172a/.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"zennの記事を元に以下を追加してみた。\n", | |
"\n", | |
"- netkeibaのレース結果をスクレイプして隊列情報を取得\n", | |
"- 各コーナーでの内外の位置取りを追加\n", | |
"- pandas-1.4.0でpd.appendが廃止されることに伴い、pd.concatに書き換え\n", | |
"- plotlyで隊列を可視化\n", | |
"\n", | |
"参考)pyparsingで競馬のコーナー通過順位をパース\n", | |
"https://zenn.dev/moripon/articles/ed5caa9c1d621e\n" | |
], | |
"metadata": { | |
"id": "qngtoogbjC2m" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"!pip install pandas\n", | |
"!pip install plotly" | |
], | |
"metadata": { | |
"id": "46uGuY2ObEWu" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "UCipf59VE4EB" | |
}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import pyparsing as pp\n", | |
"import plotly.graph_objects as go\n", | |
"from plotly.subplots import make_subplots\n", | |
"import math\n", | |
"\n", | |
"# DataFrame列名定義\n", | |
"columns = ['diff', 'horse_no', 'side']\n", | |
"\n", | |
"# 差の定数(unit:馬身)\n", | |
"DIFF_GROUP = 0.3\n", | |
"DIFF_MIN = 1.5\n", | |
"DIFF_MID = 3.0\n", | |
"DIFF_MUCH = 6.0\n", | |
"\n", | |
"class ParsePass():\n", | |
" \n", | |
" def __init__(self):\n", | |
" \n", | |
" # 馬番\n", | |
" horse_no = pp.Word(pp.nums).setParseAction(self._horse_no_action)\n", | |
" \n", | |
" # 馬群\n", | |
" group = pp.Suppress(pp.Literal('(')) + \\\n", | |
" pp.Optional(pp.delimitedList(pp.Word(pp.nums), delim=',')) + \\\n", | |
" pp.Suppress(pp.Literal(')'))\n", | |
" group.ignore('*')\n", | |
" group.setParseAction(self._group_action)\n", | |
"\n", | |
" # 情報要素\n", | |
" element = (group | horse_no)\n", | |
" \n", | |
" # 前走馬との差\n", | |
" diff_min = pp.Suppress(pp.Optional(pp.Literal(','))).setParseAction(self._diff_min_action) + element\n", | |
" diff_mid = pp.Suppress(pp.Literal('-')).setParseAction(self._diff_mid_action) + element\n", | |
" diff_much = pp.Suppress(pp.Literal('=')).setParseAction(self._diff_much_action) + element\n", | |
"\n", | |
" # 全体定義\n", | |
" self._passing_order = element + pp.ZeroOrMore( diff_mid | diff_much | diff_min )\n", | |
" \n", | |
" def _horse_no_action(self, token):\n", | |
" \n", | |
" df_append = pd.DataFrame(data=[[self._diff, token[0], 1]], columns=columns)\n", | |
" self._data = pd.concat([self._data, df_append], ignore_index=True, axis=0).drop_duplicates().reset_index(drop=True)\n", | |
" return\n", | |
"\n", | |
" def _group_action(self, token):\n", | |
" \n", | |
" for i, no in enumerate(token):\n", | |
" df_append = pd.DataFrame(data=[[self._diff, no, 1+i]], columns=columns)\n", | |
" self._data = pd.concat([self._data, df_append], ignore_index=True, axis=0).drop_duplicates().reset_index(drop=True)\n", | |
" self._diff += DIFF_GROUP\n", | |
" self._diff -= DIFF_GROUP\n", | |
" return\n", | |
" \n", | |
" def _diff_min_action(self, token):\n", | |
" \n", | |
" self._diff += DIFF_MIN\n", | |
" return\n", | |
" \n", | |
" def _diff_mid_action(self, token):\n", | |
" \n", | |
" self._diff += DIFF_MID\n", | |
" return\n", | |
" \n", | |
" def _diff_much_action(self, token):\n", | |
" \n", | |
" self._diff += DIFF_MUCH\n", | |
" return\n", | |
" \n", | |
" def parse(self, pass_str):\n", | |
" \n", | |
" # 初期化\n", | |
" self._data = pd.DataFrame(columns=columns)\n", | |
" self._diff = 0\n", | |
" # parse\n", | |
" self._passing_order.parseString(pass_str)\n", | |
" # index調整\n", | |
" self._data.index = np.arange(1, len(self._data)+1)\n", | |
" self._data.index.name = 'rank'\n", | |
"\n", | |
" return(self._data)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"netkeibaのレース結果のURLを右下のフォームに入力\n", | |
"\n", | |
"※db.netkeiba.comではなくてrace.netkeba.comのほう\n" | |
], | |
"metadata": { | |
"id": "qoTCm3QDVZa5" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"url = \"https://race.netkeiba.com/race/result.html?race_id=202305010811\"#@param {type: \"string\"}" | |
], | |
"metadata": { | |
"id": "KndSiMxMkMCh" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"df_race = pd.read_html(url)[0]\n", | |
"df_race.drop(columns=[\"性齢\",\"斤量\",\"騎手\",\"タイム\",\"着差\",\"馬体重 (増減)\",\"厩舎\"], inplace=True)\n", | |
"\n", | |
"df_corner = pd.read_html(url)[3]\n", | |
"df_corner.rename(columns={0: \"corner\", 1: \"order\"}, inplace=True)\n", | |
"df_corner.dropna(inplace=True)\n", | |
"df_corner.reset_index(drop=True, inplace=True)\n", | |
"\n", | |
"pass_parsing = ParsePass()\n", | |
"\n", | |
"dfs = {}\n", | |
"for row in df_corner.itertuples():\n", | |
" df = pass_parsing.parse(row.order)\n", | |
" df[\"horse_no\"] = df[\"horse_no\"].astype(int)\n", | |
" df_merged = pd.merge(df, df_race, left_on='horse_no', right_on=\"馬 番\")\n", | |
" dfs[row.corner] = df_merged\n", | |
"\n", | |
"color_map = {1:\"snow\", 2:\"black\", 3:\"red\", 4:\"blue\",5:\"yellow\",6:\"green\",7:\"orange\",8:\"pink\"}\n", | |
"\n", | |
"fig = make_subplots(rows=1, cols=len(dfs), subplot_titles=list(dfs.keys()))\n", | |
"\n", | |
"diff_max=0\n", | |
"for i, k in enumerate(dfs):\n", | |
" df = dfs[k]\n", | |
" if df[\"diff\"].max() > diff_max:\n", | |
" diff_max=df[\"diff\"].max()\n", | |
"\n", | |
"diff_max = math.ceil(diff_max)\n", | |
"print(diff_max)\n", | |
"\n", | |
"for i, k in enumerate(dfs):\n", | |
" df = dfs[k]\n", | |
" colors = [color_map[val] for val in df[\"枠\"]]\n", | |
" fig.add_trace(\n", | |
" go.Scatter(x=df[\"side\"], y=df[\"diff\"],\n", | |
" mode = 'markers',\n", | |
" marker = dict(size=25, color=colors, line_width=3),\n", | |
" text=df[\"馬名\"],\n", | |
" showlegend=False,\n", | |
" hovertemplate=\"%{text} <br>\",\n", | |
"\n", | |
" ), row=1, col=i+1\n", | |
" )\n", | |
" fig.update_xaxes(title = '外 ←←←←← 内外 →→→→→ 内', range=[6,0], dtick=1, visible=True, showticklabels=True, zerolinecolor='LimeGreen', gridcolor='LimeGreen' ,row=1,col=i+1)\n", | |
" fig.update_yaxes(title = '後 →→→→→ 前後(馬身) →→→→→ 前', range=[diff_max + 1, -1], zerolinecolor='LightGreen', gridcolor='LightGreen', dtick=5,showticklabels=True, row=1,col=i+1)\n", | |
"\n", | |
"fig.update_layout(\n", | |
" title = 'コーナー通過順',\n", | |
" width=len(dfs) * 300, height=diff_max * 40,\n", | |
" plot_bgcolor=\"LimeGreen\",\n", | |
")\n", | |
"\n", | |
"print(df_corner)\n", | |
"fig.show()" | |
], | |
"metadata": { | |
"id": "yz4pqvI2Urau" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
コメント失礼します。
素人質問で恐縮なのですが、私がこのコードを実行すると"馬体重"や"馬番"の部分でエラーが出てしまうのですがkun432様の環境ではエラーなしで実行できているのでしょうか?