Skip to content

Instantly share code, notes, and snippets.

@kirillbobyrev
Last active November 30, 2023 01:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kirillbobyrev/c16ef00e01370e9789d1d6b91fe63442 to your computer and use it in GitHub Desktop.
Save kirillbobyrev/c16ef00e01370e9789d1d6b91fe63442 to your computer and use it in GitHub Desktop.
Scripts for "Analyzing long win streaks in online chess"
import requests
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
from collections import Counter
PLAYER = "Hikaru"
START_DATE = "2023-01-01"
END_DATE = "2023-11-28"
LONG_STREAK = 20
MEDIUM_STREAK = 15
SHORT_STREAK = 10
headers = {"user-agent": "kirillbobyrev/1.0.0"}
def get_player_games(player: str, year: str, month: str):
r = requests.get(
f"https://api.chess.com/pub/player/{player}/games/{year}/{month}",
headers=headers,
)
games = r.json()["games"]
rows = []
for game in games:
result = 0
if game["white"]["result"] == "win":
result = 1
if game["black"]["result"] == "win":
result = -1
row = (
game["end_time"],
game["url"],
result,
game["white"]["username"],
game["black"]["username"],
game["white"]["rating"],
game["black"]["rating"],
game["time_control"],
game["rated"],
game["rules"],
game["black"]["rating"]
if game["white"]["username"] == PLAYER
else game["white"]["rating"],
game["black"]["rating"]
if game["white"]["username"] != PLAYER
else game["white"]["rating"],
)
rows.append(row)
df = pd.DataFrame(
rows,
columns=[
"end_time",
"url",
"result",
"white",
"black",
"white_rating",
"black_rating",
"time_control",
"rated",
"rules",
f"{PLAYER}_opponent_rating",
f"{PLAYER}_rating",
],
)
return df.convert_dtypes()
# Get player's games archives.
df = pd.concat(
[
get_player_games(PLAYER, "2023", "11"),
get_player_games(PLAYER, "2023", "10"),
get_player_games(PLAYER, "2023", "09"),
get_player_games(PLAYER, "2023", "08"),
get_player_games(PLAYER, "2023", "07"),
get_player_games(PLAYER, "2023", "06"),
get_player_games(PLAYER, "2023", "05"),
get_player_games(PLAYER, "2023", "04"),
get_player_games(PLAYER, "2023", "03"),
get_player_games(PLAYER, "2023", "02"),
get_player_games(PLAYER, "2023", "01"),
]
)
# Filter games that are 3+0 blitz, rated, standard chess and between START_DATE
# and END_DATE.
print(f"Finding games from {START_DATE} to {END_DATE}")
df = (
df[
(df["time_control"] == "180")
& (df["rules"] == "chess")
& (df["end_time"] > datetime.strptime(START_DATE, "%Y-%m-%d").timestamp())
& (df["end_time"] < datetime.strptime(END_DATE, "%Y-%m-%d").timestamp())
& df["rated"]
]
.set_index("end_time")
.sort_index()
)
# Add a column that indicates whether the player won the game.
df[f"{PLAYER}_won"] = np.where(
(
((df["white"] == PLAYER) & (df["result"] == 1))
| ((df["black"] == PLAYER) & (df["result"] == -1))
),
True,
False,
)
# Print some statistics.
total_games = len(df)
print(f"{PLAYER} has played {total_games} filtered games in selected time period.")
won = df[f"{PLAYER}_won"].sum()
print(f"{PLAYER} won {won} out of these games.")
drew = df["result"].value_counts()[0]
print(f"{PLAYER} drew {drew} out of these games.")
print(
f"{PLAYER} scored a total of {won + drew / 2} points out of {total_games} "
"possible."
)
# Print some rating statistics.
mean_opponent_rating = df[f"{PLAYER}_opponent_rating"].mean()
print(f"The average opponent rating is {mean_opponent_rating:.2f}")
mean_player_rating = df[f"{PLAYER}_rating"].mean()
print(f"{PLAYER} average rating is {mean_player_rating:.2f}")
# Calculate opponent rating percentiles.
percentile_25 = df[f"{PLAYER}_opponent_rating"].quantile(0.25)
percentile_50 = df[f"{PLAYER}_opponent_rating"].quantile(0.50)
percentile_75 = df[f"{PLAYER}_opponent_rating"].quantile(0.75)
percentile_90 = df[f"{PLAYER}_opponent_rating"].quantile(0.90)
print(
f"25%: {percentile_25:.2f}, 50%: {percentile_50:.2f}, 75%: {percentile_75:.2f}, 90%: {percentile_90:.2f}"
)
# Plot opponent rating distribution.
plot = df[f"{PLAYER}_opponent_rating"].plot.hist(bins=75, label="density", density=True)
plt.title(f"Opponent rating distribution for {PLAYER}")
plot.set_xlabel("Chess.com rating")
plot.set_ylabel("Number of times encountered")
plot.axvline(
mean_opponent_rating,
color="k",
linestyle="dashed",
linewidth=2,
label=f"mean = {int(mean_opponent_rating)}",
)
plot.axvline(
percentile_25,
color="r",
linestyle="dashed",
linewidth=2,
label=f"25% = {int(percentile_25)}",
)
plot.axvline(
percentile_50,
color="r",
linestyle="dashed",
linewidth=2,
label=f"50% = {int(percentile_50)}",
)
plot.axvline(
percentile_75,
color="r",
linestyle="dashed",
linewidth=2,
label=f"75% = {int(percentile_75)}",
)
plot.axvline(
percentile_90,
color="r",
linestyle="dashed",
linewidth=2,
label=f"90% = {int(percentile_90)}",
)
plot.legend()
plt.xlim(2200, 3300)
fig = plot.get_figure()
fig.savefig(f"{PLAYER}_opponent_ratings.png")
# Find longest winning streaks.
streak = 0
ratings = []
player_starting_rating = None
first_url = None
streaks = []
for result, opponent_rating, player_rating, url in zip(
df[f"{PLAYER}_won"],
df[f"{PLAYER}_opponent_rating"],
df[f"{PLAYER}_rating"],
df["url"],
):
if result:
streak += 1
ratings.append(opponent_rating)
if player_starting_rating is None:
player_starting_rating = player_rating
if first_url is None:
first_url = url
else:
if streak > 0:
streaks.append(
(streak, np.mean(ratings), player_starting_rating, first_url)
)
streak = 0
ratings = []
first_url = None
player_starting_rating = None
if streak > 0:
streaks.append((streak, np.mean(ratings), player_starting_rating, first_url))
TOP_K = 10
print(f"{PLAYER}'s top {TOP_K} winning streaks:")
for streak, average_opponent_rating, player_rating, url in sorted(
streaks, reverse=True
)[:TOP_K]:
print(
f"{streak} games, average opponent rating "
f"{average_opponent_rating:.2f}, starting {PLAYER} rating "
f"{player_rating:.2f}, starting at {url}"
)
streak_lengths = list(map(lambda item: item[0], streaks))
print("Streaks:")
print(sorted(streak_lengths, reverse=True)[:30])
short_streaks = Counter(i >= SHORT_STREAK for i in streak_lengths)[True]
print(f"Short streaks of {SHORT_STREAK}+ wins: {short_streaks}")
medium_streaks = Counter(i >= MEDIUM_STREAK for i in streak_lengths)[True]
print(f"Medium streaks of {MEDIUM_STREAK}+ wins: {medium_streaks}")
long_streaks = Counter(i >= LONG_STREAK for i in streak_lengths)[True]
print(f"Short streaks of {LONG_STREAK}+ wins: {long_streaks}")
trimmed_lengths = list(filter(lambda length: length >= 5, streak_lengths))
plt.clf()
plt.hist(trimmed_lengths, bins=20)
plt.xlim(5, 60)
plt.savefig("win_streaks_distribution.png")
#include <cstdlib>
#include <iostream>
#include <random>
using namespace std;
constexpr int kNumSimulationRunsPerBatch = 100'000;
// Number of games the player played this year.
constexpr int kNumTrials = 908;
// Minimum length of of wins to count as a streak.
constexpr int kStreakMinLength = 32;
// Minimum number of streaks to count as a success.
constexpr int kMinStreaks = 1;
// Probability of winning a game calculated using Elo win probability formula.
constexpr double kWinProbability = .83;
default_random_engine generator;
uniform_real_distribution<double> distribution(0.0, 1.0);
// Returns whether or not the player won game.
bool sample_win() { return distribution(generator) < kWinProbability; }
// Returns whether or not the sequence of kNumTrials games contains kMinStreaks
// or more streaks of kMinStreakLength wins.
bool sample_sequence() {
int streak = 0;
int num_streaks = 0;
for (int i = 0; i < kNumTrials; ++i) {
if (sample_win()) {
++streak;
continue;
}
if (streak >= kStreakMinLength) {
++num_streaks;
}
streak = 0;
}
if (streak >= kStreakMinLength) {
++num_streaks;
}
return num_streaks >= kMinStreaks;
}
int main() {
srand(time(NULL));
cout << "Starting Monte Carlo simulation." << endl;
cout << "Number of simulations per batch: " << kNumSimulationRunsPerBatch
<< endl;
cout << "Number of trials: " << kNumTrials << endl;
cout << "Minimum streak length: " << kStreakMinLength << endl;
cout << "Minimum number of streaks: " << kMinStreaks << endl;
cout << "Win probability: " << kWinProbability << endl;
long long int sequences = 0;
long long int successes = 0;
while (true) {
for (int i = 0; i < kNumSimulationRunsPerBatch; ++i) {
bool had_enough_streaks = sample_sequence();
++sequences;
if (had_enough_streaks) {
++successes;
}
}
cout << "Simulations: " << sequences << " with " << kMinStreaks
<< " or more " << kStreakMinLength
<< "-game win streaks: " << successes << " probability: "
<< static_cast<double>(successes) / static_cast<double>(sequences)
<< endl;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment