Skip to content

Instantly share code, notes, and snippets.

@cmgchess
Last active July 13, 2024 13:26
Show Gist options
  • Save cmgchess/1c18df5801184a54355b1bdeb0dceac4 to your computer and use it in GitHub Desktop.
Save cmgchess/1c18df5801184a54355b1bdeb0dceac4 to your computer and use it in GitHub Desktop.
chesscom_tournament_pgn.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"source": [
"Resources\n",
"\n",
"\n",
"* https://www.chess.com/clubs/forum/view/official-chess-com-movelist-pgn-help\n",
"\n",
"* https://www.chess.com/clubs/forum/view/move-list-format-when-viewing-my-game-via-callback\n",
"\n",
"\n",
"* https://github.com/savi2w/chess-tcn\n",
"* https://raw.githubusercontent.com/AnishN/bugaboo/master/tcn_parser.py\n",
"\n",
"\n",
"\n",
"\n",
"\n"
],
"metadata": {
"id": "o6nbrJW0Qbxt"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Y65sZZZc_ax0",
"outputId": "12526741-7267-43ee-ed77-cee55733c214"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting chess\n",
" Downloading chess-1.10.0-py3-none-any.whl (154 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m154.4/154.4 kB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h"
]
}
],
"source": [
"!pip install chess\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "SxQ2S2VMEqT6"
},
"outputs": [],
"source": [
"import requests\n",
"from bs4 import BeautifulSoup\n",
"import json\n",
"import os\n",
"from datetime import datetime\n",
"import re\n",
"import math\n",
"import chess\n",
"import chess.pgn"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "YCNTxF5hIiyz"
},
"outputs": [],
"source": [
"piece_map = {\n",
" 'p': chess.PAWN,\n",
" 'n': chess.KNIGHT,\n",
" 'b': chess.BISHOP,\n",
" 'r': chess.ROOK,\n",
" 'q': chess.QUEEN,\n",
" 'k': chess.KING\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "z96hCIW09bIi"
},
"outputs": [],
"source": [
"def chunk_string(s, length):\n",
" return re.findall('.{1,' + str(length) + '}', s)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "MUHdJTrn9Jwj"
},
"outputs": [],
"source": [
"#1:1 port of chess-tcn npm library that chess.com uses\n",
"#this is used to encode the moveList into a string\n",
"\n",
"def decode_tcn(n):\n",
" tcn_chars = \"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!?{~}(^)[_]@#$,./&-*++=\"\n",
" piece_chars = \"qnrbkp\"\n",
" o = 0\n",
" s = 0\n",
" u = 0\n",
" w = len(n)\n",
" c = []\n",
" for i in range(0, w, 2):\n",
" u = {\n",
" \"from\": None,\n",
" \"to\": None,\n",
" \"drop\": None,\n",
" \"promotion\": None,\n",
" }\n",
" o = tcn_chars.index(n[i])\n",
" s = tcn_chars.index(n[i + 1])\n",
" if s > 63:\n",
" u[\"promotion\"] = piece_chars[math.floor((s - 64) / 3)]\n",
" s = o + (-8 if o < 16 else 8) + ((s - 1) % 3) - 1\n",
" if o > 75:\n",
" u[\"drop\"] = piece_chars[o - 79]\n",
" else:\n",
" u[\"from\"] = tcn_chars[o % 8] + str(math.floor(o / 8) + 1)\n",
" u[\"to\"] = tcn_chars[s % 8] + str(math.floor(s / 8) + 1)\n",
" c.append(u)\n",
" return c"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "iSRDmG7iGau0"
},
"outputs": [],
"source": [
"def generate_pgn(uci, headers):\n",
" game = chess.pgn.Game()\n",
" for h in headers:\n",
" game.headers[h] = str(headers[h])\n",
" node = game\n",
" for move_idx,i in enumerate(uci):\n",
" drop = i.get(\"drop\")\n",
" mapped_drop = piece_map[drop] if drop is not None else None\n",
" promotion = i.get(\"promotion\")\n",
" mapped_promotion = piece_map[promotion] if promotion is not None else None\n",
" move = chess.Move(from_square=chess.parse_square(i[\"from\"]),to_square=chess.parse_square(i[\"to\"]),drop=mapped_drop, promotion=mapped_promotion)\n",
" if (move_idx==0):\n",
" node = game.add_variation(chess.Move.from_uci(str(move)))\n",
" else:\n",
" node = node.add_variation(chess.Move.from_uci(str(move)))\n",
"\n",
" return game"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Z4HCaqGOBh3K"
},
"outputs": [],
"source": [
"tourn_url = \"https://www.chess.com/tournament/live/early-titled-tuesday-blitz-july-09-2024-4882427\"\n",
"game_base_url = \"https://www.chess.com/callback/live/game/\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "VFmtXn0Emwtl"
},
"outputs": [],
"source": [
"tourn_name = tourn_url.split(\"live/\")[-1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "WNycf2dH1zLb"
},
"outputs": [],
"source": [
"player_map = {}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "gLEwVwz_x9-5"
},
"outputs": [],
"source": [
"r = requests.get(tourn_url)\n",
"soup = BeautifulSoup(r.content, 'html.parser')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "KONBJTLXnm58"
},
"outputs": [],
"source": [
"stats_el = soup.find('div', class_='tournaments-live-view-content-stats')\n",
"span_elements = stats_el.find_all('span')\n",
"date_and_time = span_elements[2].get_text(strip=True)\n",
"parsed_date = datetime.strptime(date_and_time, \"%b %d, %Y, %I:%M %p\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "s0tLbFSx6C8r"
},
"outputs": [],
"source": [
"name_el = soup.find('h1', class_='v5-title-label')\n",
"name = name_el.get_text().strip()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "GVzf-pDlnseA"
},
"outputs": [],
"source": [
"year = parsed_date.year\n",
"month = parsed_date.month"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "LCVF4bIByNj6"
},
"outputs": [],
"source": [
"num_rnds_div = soup.find('div', class_ = 'v5-section')\n",
"num_rnds = 0\n",
"if num_rnds_div:\n",
" num_rnds = int(num_rnds_div.get('data-rounds'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "hjTAFmaKznPG"
},
"outputs": [],
"source": [
"for i in range(num_rnds):\n",
" rnd = i + 1\n",
" rnd_url = tourn_url+\"?round=\"+str(rnd)+\"&pairings=1\"\n",
" rnd_r = requests.get(rnd_url)\n",
" rnd_soup = BeautifulSoup(rnd_r.content, 'html.parser')\n",
" pairing_dev = rnd_soup.find('div', {'id': 'pairings-pagination-bottom'})\n",
" data_total_pages_value = 1\n",
" if pairing_dev:\n",
" data_total_pages_value = int(pairing_dev.get('data-total-pages', 1))\n",
" for j in range(data_total_pages_value):\n",
" pairing = j + 1\n",
" pairing_url = tourn_url+\"?round=\"+str(rnd)+\"&pairings=\"+str(pairing)\n",
" pairing_r = requests.get(pairing_url)\n",
" pairing_soup = BeautifulSoup(pairing_r.content, 'html.parser')\n",
" table = pairing_soup.find('table', class_ = 'table-component table-hover tournaments-live-view-pairings-table')\n",
" table_rows = table.find_all('tr')\n",
" table_rows = table_rows[1:]\n",
" for row in table_rows:\n",
" a = row.find('a', class_='tournaments-live-view-background-link')\n",
" players = row.find_all('div', class_='tournaments-live-view-pairings-user')\n",
" white = players[0].find('a', class_='tournaments-live-view-player-avatar').get('title','')\n",
" if white not in player_map:\n",
" player_map[white] = set()\n",
" href = a.get('href','')\n",
" href = href.split(\"/\")\n",
" g_id = href[-1]\n",
" player_map[white].add(g_id)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "HgRBWvzoMj3e"
},
"outputs": [],
"source": [
"player_list = list(player_map.keys())\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "L6yIjTCKZJHS"
},
"outputs": [],
"source": [
"print(\"No of players:\",len(player_list))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "69NebZe_ZtUn"
},
"outputs": [],
"source": [
"player_map_values = player_map.values()\n",
"total_gms = sum(len(value) for value in player_map_values)\n",
"print(\"Total no of game ids:\",total_gms)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "s7twkwJcMwSM"
},
"outputs": [],
"source": [
"games = []"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "18YO5apyNi3A"
},
"outputs": [],
"source": [
"for idx,player in enumerate(player_list):\n",
" if idx%50 == 0:\n",
" print(\"Processing player\",idx)\n",
" game_ids = player_map[player]\n",
" for g in game_ids:\n",
" game_data = requests.get(game_base_url+str(g))\n",
" game_json = game_data.json()\n",
" move_list = game_json['game']['moveList']\n",
" headers = game_json['game']['pgnHeaders']\n",
" headers['Event'] = name\n",
" chunked_move_list = chunk_string(move_list, 2)\n",
" decoded_move_list = [decode_tcn(move)[0] for move in chunked_move_list]\n",
" pgn = generate_pgn(decoded_move_list, headers)\n",
" games.append(pgn)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "4nH44Az9TSSG"
},
"outputs": [],
"source": [
"print(\"In games string:\",len(games))"
]
},
{
"cell_type": "code",
"source": [
"games = [str(game) for game in games]"
],
"metadata": {
"id": "Z1XnBCC5NSz7"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "8VubtjDHXARk"
},
"outputs": [],
"source": [
"game_str = \"\\n\\n\".join(games)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "yevPITbOXN49"
},
"outputs": [],
"source": [
"file_path = tourn_name+\".pgn\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "kQIwDcXNXZAj"
},
"outputs": [],
"source": [
"with open(file_path,\"w\") as pgn_file:\n",
" pgn_file.write(game_str)"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment