Skip to content

Instantly share code, notes, and snippets.

@cmgchess
Created December 3, 2023 10:11
Show Gist options
  • Save cmgchess/1c18df5801184a54355b1bdeb0dceac4 to your computer and use it in GitHub Desktop.
Save cmgchess/1c18df5801184a54355b1bdeb0dceac4 to your computer and use it in GitHub Desktop.
chesscom_tournament_pgn.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/cmgchess/1c18df5801184a54355b1bdeb0dceac4/chesscom_tournament_pgn.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "SL18T0Wr_IEY"
},
"outputs": [],
"source": [
"!pip install chess.com"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "SxQ2S2VMEqT6"
},
"outputs": [],
"source": [
"import requests\n",
"from bs4 import BeautifulSoup\n",
"import json\n",
"import os\n",
"from datetime import datetime\n",
"import re\n",
"from chessdotcom import Client, get_player_games_by_month_pgn"
]
},
{
"cell_type": "code",
"source": [
"tourn_url = \"https://www.chess.com/tournament/live/-monthly-32-blitz-masters-443681\""
],
"metadata": {
"id": "Z4HCaqGOBh3K"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "e2e-jlO4_8ES"
},
"outputs": [],
"source": [
"\n",
"Client.request_config[\"headers\"][\"User-Agent\"] = (\n",
" \"My Python Application. \"\n",
" \"Contact me at email@example.com\"\n",
")"
]
},
{
"cell_type": "code",
"source": [
"tourn_name = tourn_url.split(\"live/\")[-1]"
],
"metadata": {
"id": "VFmtXn0Emwtl"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"player_map = {}"
],
"metadata": {
"id": "WNycf2dH1zLb"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"r = requests.get(tourn_url)\n",
"soup = BeautifulSoup(r.content, 'html.parser')"
],
"metadata": {
"id": "gLEwVwz_x9-5"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"stats_el = soup.find('div', class_='tournaments-live-view-content-stats')\n",
"span_elements = stats_el.find_all('span')\n",
"date_and_time = span_elements[2].get_text(strip=True)\n",
"parsed_date = datetime.strptime(date_and_time, \"%b %d, %Y, %I:%M %p\")\n"
],
"metadata": {
"id": "KONBJTLXnm58"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"name_el = soup.find('span', class_='v5-title-has-icon tournaments')\n",
"name = name_el.get_text().strip()"
],
"metadata": {
"id": "SvG3YwLcFm-A"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"year = parsed_date.year\n",
"month = parsed_date.month"
],
"metadata": {
"id": "GVzf-pDlnseA"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"num_rnds_div = soup.find('div', class_ = 'v5-section')\n",
"num_rnds = 0\n",
"if num_rnds_div:\n",
" num_rnds = int(num_rnds_div.get('data-rounds'))"
],
"metadata": {
"id": "LCVF4bIByNj6"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"for i in range(num_rnds):\n",
" rnd = i + 1\n",
" rnd_url = tourn_url+\"?round=\"+str(rnd)+\"&pairings=1\"\n",
" rnd_r = requests.get(rnd_url)\n",
" rnd_soup = BeautifulSoup(rnd_r.content, 'html.parser')\n",
" pairing_dev = rnd_soup.find('div', {'id': 'pairings-pagination-bottom'})\n",
" data_total_pages_value = 1\n",
" if pairing_dev:\n",
" data_total_pages_value = int(pairing_dev.get('data-total-pages', 1))\n",
" for j in range(data_total_pages_value):\n",
" pairing = j + 1\n",
" pairing_url = tourn_url+\"?round=\"+str(rnd)+\"&pairings=\"+str(pairing)\n",
" pairing_r = requests.get(pairing_url)\n",
" pairing_soup = BeautifulSoup(pairing_r.content, 'html.parser')\n",
" table = pairing_soup.find('table', class_ = 'table-component table-hover tournaments-live-view-pairings-table')\n",
" table_rows = table.find_all('tr')\n",
" table_rows = table_rows[1:]\n",
" for row in table_rows:\n",
" a = row.find('a', class_='tournaments-live-view-background-link')\n",
" players = row.find_all('div', class_='tournaments-live-view-pairings-user')\n",
" white = players[0].find('a', class_='tournaments-live-view-player-avatar').get('title','')\n",
" if white not in player_map:\n",
" player_map[white] = set()\n",
" href = a.get('href','')\n",
" href = href.split(\"/\")\n",
" g_id = href[-1]\n",
" player_map[white].add(g_id)\n"
],
"metadata": {
"id": "hjTAFmaKznPG"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"player_list = list(player_map.keys())\n"
],
"metadata": {
"id": "HgRBWvzoMj3e"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"print(\"No of players:\",len(player_list))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "L6yIjTCKZJHS",
"outputId": "f78537d7-f060-4c9e-a47e-526c249cdda7"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"No of players: 50\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"player_map_values = player_map.values()\n",
"total_gms = sum(len(value) for value in player_map_values)\n",
"print(\"Total no of game ids:\",total_gms)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "69NebZe_ZtUn",
"outputId": "b3bf3fa1-a15b-4ffd-f54c-c4600fc75ea4"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Total no of game ids: 190\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"games = []"
],
"metadata": {
"id": "s7twkwJcMwSM"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"pattern = r\"www\\.chess\\.com/game/live/(\\d+)\""
],
"metadata": {
"id": "n7OiCrcsRF8R"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"for idx,player in enumerate(player_list):\n",
" if idx%50 == 0:\n",
" print(\"Processing player\",idx)\n",
" res = get_player_games_by_month_pgn(player,year,month)\n",
" game_ids = player_map.get(player)\n",
" if res:\n",
" res = res.json['pgn']['pgn']\n",
" res = res.split(\"\\n\\n\\n\")\n",
" for g in res:\n",
" game_match = re.search(pattern, g)\n",
" if game_match:\n",
" matched_id = game_match.group(1)\n",
" if matched_id in game_ids:\n",
" games.append(g)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "18YO5apyNi3A",
"outputId": "f2566006-f9e3-4b6a-8e04-fa500bc0e782"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Processing player 0\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(\"In games string:\",len(games))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "4nH44Az9TSSG",
"outputId": "f7443d87-4a87-43a9-aee9-e07ef09f4f30"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"In games string: 190\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"game_str = \"\\n\\n\\n\".join(games)"
],
"metadata": {
"id": "8VubtjDHXARk"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"remove_pattern = r'\\n\\[ECOUrl \"[^\"]+\"\\]'"
],
"metadata": {
"id": "hzyPqVV6kHMk"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"event_header = f'[Event \"{name}\"]'"
],
"metadata": {
"id": "8sscJuOdFzda"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"game_str = re.sub(remove_pattern,'',game_str)"
],
"metadata": {
"id": "5xKmut2WlAFx"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"game_str = game_str.replace('[Event \"Live Chess\"]', event_header)"
],
"metadata": {
"id": "AMAm2kVjGrLx"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"file_path = tourn_name+\".pgn\""
],
"metadata": {
"id": "yevPITbOXN49"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"with open(file_path,\"w\") as pgn_file:\n",
" pgn_file.write(game_str)"
],
"metadata": {
"id": "kQIwDcXNXZAj"
},
"execution_count": null,
"outputs": []
}
],
"metadata": {
"colab": {
"provenance": [],
"include_colab_link": true
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment