Last active
July 13, 2024 13:26
-
-
Save cmgchess/1c18df5801184a54355b1bdeb0dceac4 to your computer and use it in GitHub Desktop.
chesscom_tournament_pgn.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"Resources\n", | |
"\n", | |
"\n", | |
"* https://www.chess.com/clubs/forum/view/official-chess-com-movelist-pgn-help\n", | |
"\n", | |
"* https://www.chess.com/clubs/forum/view/move-list-format-when-viewing-my-game-via-callback\n", | |
"\n", | |
"\n", | |
"* https://github.com/savi2w/chess-tcn\n", | |
"* https://raw.githubusercontent.com/AnishN/bugaboo/master/tcn_parser.py\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n" | |
], | |
"metadata": { | |
"id": "o6nbrJW0Qbxt" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "Y65sZZZc_ax0", | |
"outputId": "12526741-7267-43ee-ed77-cee55733c214" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Collecting chess\n", | |
" Downloading chess-1.10.0-py3-none-any.whl (154 kB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m154.4/154.4 kB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25h" | |
] | |
} | |
], | |
"source": [ | |
"!pip install chess\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "SxQ2S2VMEqT6" | |
}, | |
"outputs": [], | |
"source": [ | |
"import requests\n", | |
"from bs4 import BeautifulSoup\n", | |
"import json\n", | |
"import os\n", | |
"from datetime import datetime\n", | |
"import re\n", | |
"import math\n", | |
"import chess\n", | |
"import chess.pgn" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "YCNTxF5hIiyz" | |
}, | |
"outputs": [], | |
"source": [ | |
"piece_map = {\n", | |
" 'p': chess.PAWN,\n", | |
" 'n': chess.KNIGHT,\n", | |
" 'b': chess.BISHOP,\n", | |
" 'r': chess.ROOK,\n", | |
" 'q': chess.QUEEN,\n", | |
" 'k': chess.KING\n", | |
"}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "z96hCIW09bIi" | |
}, | |
"outputs": [], | |
"source": [ | |
"def chunk_string(s, length):\n", | |
" return re.findall('.{1,' + str(length) + '}', s)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "MUHdJTrn9Jwj" | |
}, | |
"outputs": [], | |
"source": [ | |
"#1:1 port of chess-tcn npm library that chess.com uses\n", | |
"#this is used to encode the moveList into a string\n", | |
"\n", | |
"def decode_tcn(n):\n", | |
" tcn_chars = \"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!?{~}(^)[_]@#$,./&-*++=\"\n", | |
" piece_chars = \"qnrbkp\"\n", | |
" o = 0\n", | |
" s = 0\n", | |
" u = 0\n", | |
" w = len(n)\n", | |
" c = []\n", | |
" for i in range(0, w, 2):\n", | |
" u = {\n", | |
" \"from\": None,\n", | |
" \"to\": None,\n", | |
" \"drop\": None,\n", | |
" \"promotion\": None,\n", | |
" }\n", | |
" o = tcn_chars.index(n[i])\n", | |
" s = tcn_chars.index(n[i + 1])\n", | |
" if s > 63:\n", | |
" u[\"promotion\"] = piece_chars[math.floor((s - 64) / 3)]\n", | |
" s = o + (-8 if o < 16 else 8) + ((s - 1) % 3) - 1\n", | |
" if o > 75:\n", | |
" u[\"drop\"] = piece_chars[o - 79]\n", | |
" else:\n", | |
" u[\"from\"] = tcn_chars[o % 8] + str(math.floor(o / 8) + 1)\n", | |
" u[\"to\"] = tcn_chars[s % 8] + str(math.floor(s / 8) + 1)\n", | |
" c.append(u)\n", | |
" return c" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "iSRDmG7iGau0" | |
}, | |
"outputs": [], | |
"source": [ | |
"def generate_pgn(uci, headers):\n", | |
" game = chess.pgn.Game()\n", | |
" for h in headers:\n", | |
" game.headers[h] = str(headers[h])\n", | |
" node = game\n", | |
" for move_idx,i in enumerate(uci):\n", | |
" drop = i.get(\"drop\")\n", | |
" mapped_drop = piece_map[drop] if drop is not None else None\n", | |
" promotion = i.get(\"promotion\")\n", | |
" mapped_promotion = piece_map[promotion] if promotion is not None else None\n", | |
" move = chess.Move(from_square=chess.parse_square(i[\"from\"]),to_square=chess.parse_square(i[\"to\"]),drop=mapped_drop, promotion=mapped_promotion)\n", | |
" if (move_idx==0):\n", | |
" node = game.add_variation(chess.Move.from_uci(str(move)))\n", | |
" else:\n", | |
" node = node.add_variation(chess.Move.from_uci(str(move)))\n", | |
"\n", | |
" return game" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "Z4HCaqGOBh3K" | |
}, | |
"outputs": [], | |
"source": [ | |
"tourn_url = \"https://www.chess.com/tournament/live/early-titled-tuesday-blitz-july-09-2024-4882427\"\n", | |
"game_base_url = \"https://www.chess.com/callback/live/game/\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "VFmtXn0Emwtl" | |
}, | |
"outputs": [], | |
"source": [ | |
"tourn_name = tourn_url.split(\"live/\")[-1]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "WNycf2dH1zLb" | |
}, | |
"outputs": [], | |
"source": [ | |
"player_map = {}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "gLEwVwz_x9-5" | |
}, | |
"outputs": [], | |
"source": [ | |
"r = requests.get(tourn_url)\n", | |
"soup = BeautifulSoup(r.content, 'html.parser')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "KONBJTLXnm58" | |
}, | |
"outputs": [], | |
"source": [ | |
"stats_el = soup.find('div', class_='tournaments-live-view-content-stats')\n", | |
"span_elements = stats_el.find_all('span')\n", | |
"date_and_time = span_elements[2].get_text(strip=True)\n", | |
"parsed_date = datetime.strptime(date_and_time, \"%b %d, %Y, %I:%M %p\")\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "s0tLbFSx6C8r" | |
}, | |
"outputs": [], | |
"source": [ | |
"name_el = soup.find('h1', class_='v5-title-label')\n", | |
"name = name_el.get_text().strip()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "GVzf-pDlnseA" | |
}, | |
"outputs": [], | |
"source": [ | |
"year = parsed_date.year\n", | |
"month = parsed_date.month" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "LCVF4bIByNj6" | |
}, | |
"outputs": [], | |
"source": [ | |
"num_rnds_div = soup.find('div', class_ = 'v5-section')\n", | |
"num_rnds = 0\n", | |
"if num_rnds_div:\n", | |
" num_rnds = int(num_rnds_div.get('data-rounds'))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "hjTAFmaKznPG" | |
}, | |
"outputs": [], | |
"source": [ | |
"for i in range(num_rnds):\n", | |
" rnd = i + 1\n", | |
" rnd_url = tourn_url+\"?round=\"+str(rnd)+\"&pairings=1\"\n", | |
" rnd_r = requests.get(rnd_url)\n", | |
" rnd_soup = BeautifulSoup(rnd_r.content, 'html.parser')\n", | |
" pairing_dev = rnd_soup.find('div', {'id': 'pairings-pagination-bottom'})\n", | |
" data_total_pages_value = 1\n", | |
" if pairing_dev:\n", | |
" data_total_pages_value = int(pairing_dev.get('data-total-pages', 1))\n", | |
" for j in range(data_total_pages_value):\n", | |
" pairing = j + 1\n", | |
" pairing_url = tourn_url+\"?round=\"+str(rnd)+\"&pairings=\"+str(pairing)\n", | |
" pairing_r = requests.get(pairing_url)\n", | |
" pairing_soup = BeautifulSoup(pairing_r.content, 'html.parser')\n", | |
" table = pairing_soup.find('table', class_ = 'table-component table-hover tournaments-live-view-pairings-table')\n", | |
" table_rows = table.find_all('tr')\n", | |
" table_rows = table_rows[1:]\n", | |
" for row in table_rows:\n", | |
" a = row.find('a', class_='tournaments-live-view-background-link')\n", | |
" players = row.find_all('div', class_='tournaments-live-view-pairings-user')\n", | |
" white = players[0].find('a', class_='tournaments-live-view-player-avatar').get('title','')\n", | |
" if white not in player_map:\n", | |
" player_map[white] = set()\n", | |
" href = a.get('href','')\n", | |
" href = href.split(\"/\")\n", | |
" g_id = href[-1]\n", | |
" player_map[white].add(g_id)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "HgRBWvzoMj3e" | |
}, | |
"outputs": [], | |
"source": [ | |
"player_list = list(player_map.keys())\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "L6yIjTCKZJHS" | |
}, | |
"outputs": [], | |
"source": [ | |
"print(\"No of players:\",len(player_list))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "69NebZe_ZtUn" | |
}, | |
"outputs": [], | |
"source": [ | |
"player_map_values = player_map.values()\n", | |
"total_gms = sum(len(value) for value in player_map_values)\n", | |
"print(\"Total no of game ids:\",total_gms)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "s7twkwJcMwSM" | |
}, | |
"outputs": [], | |
"source": [ | |
"games = []" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "18YO5apyNi3A" | |
}, | |
"outputs": [], | |
"source": [ | |
"for idx,player in enumerate(player_list):\n", | |
" if idx%50 == 0:\n", | |
" print(\"Processing player\",idx)\n", | |
" game_ids = player_map[player]\n", | |
" for g in game_ids:\n", | |
" game_data = requests.get(game_base_url+str(g))\n", | |
" game_json = game_data.json()\n", | |
" move_list = game_json['game']['moveList']\n", | |
" headers = game_json['game']['pgnHeaders']\n", | |
" headers['Event'] = name\n", | |
" chunked_move_list = chunk_string(move_list, 2)\n", | |
" decoded_move_list = [decode_tcn(move)[0] for move in chunked_move_list]\n", | |
" pgn = generate_pgn(decoded_move_list, headers)\n", | |
" games.append(pgn)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "4nH44Az9TSSG" | |
}, | |
"outputs": [], | |
"source": [ | |
"print(\"In games string:\",len(games))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"games = [str(game) for game in games]" | |
], | |
"metadata": { | |
"id": "Z1XnBCC5NSz7" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "8VubtjDHXARk" | |
}, | |
"outputs": [], | |
"source": [ | |
"game_str = \"\\n\\n\".join(games)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "yevPITbOXN49" | |
}, | |
"outputs": [], | |
"source": [ | |
"file_path = tourn_name+\".pgn\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "kQIwDcXNXZAj" | |
}, | |
"outputs": [], | |
"source": [ | |
"with open(file_path,\"w\") as pgn_file:\n", | |
" pgn_file.write(game_str)" | |
] | |
} | |
], | |
"metadata": { | |
"colab": { | |
"provenance": [] | |
}, | |
"kernelspec": { | |
"display_name": "Python 3", | |
"name": "python3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment