Last active
May 25, 2022 15:27
-
-
Save zzstoatzz/5d012f141a3d15e5f080d80c363af829 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from chessdotcom import get_player_game_archives | |
from io import StringIO | |
from typing import List, Tuple | |
import boto3, chess.pgn as pgn, pandas as pd, requests | |
# https://python-chess.readthedocs.io/en/latest/pgn.html | |
class Game(pgn.Game): | |
def __init__(self: object, pgn_str: str): | |
self.game_obj = pgn.read_game(StringIO(pgn_str)).__dict__ | |
self.game_obj['variations'] = str(self.game_obj['variations'][0]) | |
self.df = pd.json_normalize(dict(self.game_obj['headers'])) | |
self.df['pgn'] = self.game_obj['variations'] | |
def get_games(url: str) -> List[Game]: | |
print(f"GET {url}") | |
return [Game(game['pgn']) for game in requests.get(url).json()['games']] | |
def load_games(games: Tuple[Game], base_path: str) -> None: | |
df = pd.concat([game.df for game in games]) | |
year, month, _ = list(df['Date'])[0].split('.') | |
print(f'storing games from month {month} of year {year}...') | |
filepath = f'{base_path}/games/{year}_{month}.parquet.gzip' | |
df.to_parquet(filepath, compression='gzip') | |
def orca(filepath: str) -> None: | |
for username in ['n80n8']: | |
print(f"Checking for games on chess.com from: {username}") | |
# list of URLs to GET months of games from | |
archive_urls = get_player_game_archives(username=username).archives | |
new_user_games = [get_games(url) for url in archive_urls] | |
if len(new_user_games) == 0: | |
print(f'No new months of games to load for {username}!') | |
continue | |
print(f'Fetching {len(new_user_games) } new months of games from {username}..') | |
for month in new_user_games: | |
load_games( | |
games=month, | |
base_path=filepath | |
) | |
if __name__ == "__main__": | |
orca('s3://nate-all-purpose-bucket') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment