Skip to content

Instantly share code, notes, and snippets.

@forrestthewoods
Created April 9, 2024 05:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save forrestthewoods/9b870a19bb5e2e84f95c96fdd41dfa1b to your computer and use it in GitHub Desktop.
Save forrestthewoods/9b870a19bb5e2e84f95c96fdd41dfa1b to your computer and use it in GitHub Desktop.
import os
import json
import matplotlib.pyplot as plt
import numpy as np
import pprint
import requests
from matplotlib.offsetbox import AnnotationBbox, OffsetImage
# Useful URLS
# https://www.fotmob.com/api/leagues?id=130&season=2023
# https://www.fotmob.com/api/matchDetails?matchId=4386821
# Constants
ENABLE_CACHE = True
DEBUG_SPEW = False
LEAGUE_ID_MLS = 130
SEASON_ID = 2024
# Runtime pseudo-constants
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
def try_load_from_cache(filepath):
if ENABLE_CACHE:
if os.path.exists(filepath):
with open(filepath, "r") as file:
result = json.load(file)
if DEBUG_SPEW:
print(f"Loaded from cache: {filepath}")
return result
return None
def write_to_cache(json_data, filepath):
os.makedirs(os.path.dirname(filepath), exist_ok=True) # python is regarded
with open(filepath, "w") as file:
json.dump(json_data, file, indent=4)
if DEBUG_SPEW:
print(f"Wrote to cache: {filepath}")
def fetch_url_json(url: str):
if DEBUG_SPEW:
print(f"Fetching: {url}")
response = requests.get(url)
if response.status_code != 200:
raise Exception("Failed to fetch: [{url}]\nResponse: [{str(response)}]")
return response.json()
def fetch_match_details(match_id):
url = f"https://www.fotmob.com/api/matchDetails?matchId={match_id}"
return fetch_url_json(url)
def get_season_info(league_id, season):
# compute season filepath
filename = f"season_{league_id}_{season}.json"
filepath = os.path.join(SCRIPT_DIR, "cache", filename)
# check cache
season_data = try_load_from_cache(filepath)
if season_data:
return season_data
# try to fetch
url = f"https://www.fotmob.com/api/leagues?id={league_id}&season={season}"
season_data = fetch_url_json(url)
# write to cache
write_to_cache(season_data, filepath)
# return result
return season_data
def get_matches(season):
result = []
# iterate all matches
matches = season["matches"]["allMatches"]
for match in matches:
match_id = match["id"]
# ignore games that haven't finished
if match["status"]["finished"] == False:
continue
# compute match filepath
filename = f"match_{match_id}.json"
filepath = os.path.join(SCRIPT_DIR, "cache", filename)
# chech cache
match_data = try_load_from_cache(filepath)
if match_data:
result.append(match_data)
continue
# try to fetch
url = f"https://www.fotmob.com/api/matchDetails?matchId={match_id}"
match_data = fetch_url_json(url)
# write to cache
write_to_cache(match_data, filepath)
# append result
result.append(match_data)
return result
def accumulate_xg(matches):
result = {}
# iterate matches
for match in matches:
# ignore games that didn't finish
if match["general"]["finished"] == False:
continue
# determine teams
home_team_name = match["general"]["homeTeam"]["name"]
away_team_name = match["general"]["awayTeam"]["name"]
# find xG and xGoT data
xg_data = match["content"]["stats"]["Periods"]["All"]["stats"][2]
xg = None
xgot = None
for entry in xg_data["stats"]:
if entry["key"] == "expected_goals" and entry["stats"][0] != None:
xg = entry["stats"]
elif entry["key"] == "expected_goals_on_target" and entry["stats"][0] != None:
xgot = entry["stats"]
# ignore this game if we couldn't find xg and xgot
if xg == None or xgot == None:
continue
# ensure keys because Python is regarded
if not home_team_name in result:
result[home_team_name] = { "goals_for": 0, "xg": 0.0, "xgot": 0.0, "goals_against": 0, "xg_against": 0.0, "xgot_against": 0.0 }
if not away_team_name in result:
result[away_team_name] = { "goals_for": 0, "xg": 0.0, "xgot": 0.0, "goals_against": 0, "xg_against": 0.0, "xgot_against": 0.0 }
# get data
home_goals = match["header"]["teams"][0]["score"]
home_xg = float(xg[0])
home_xgot = float(xgot[0])
assert(match["header"]["teams"][0]["name"] == home_team_name)
away_goals = match["header"]["teams"][1]["score"]
away_xg = float(xg[1])
away_xgot = float(xgot[1])
assert(match["header"]["teams"][1]["name"] == away_team_name)
# accumulate home
result[home_team_name]["goals_for"] = result[home_team_name]["goals_for"] + home_goals
result[home_team_name]["xg"] = result[home_team_name]["xg"] + home_xg
result[home_team_name]["xgot"] = result[home_team_name]["xgot"] + home_xgot
result[home_team_name]["goals_against"] = result[home_team_name]["goals_against"] + away_goals
result[home_team_name]["xg_against"] = result[home_team_name]["xg"] + away_xg
result[home_team_name]["xgot_against"] = result[home_team_name]["xgot"] + away_xgot
# accumulate away
result[away_team_name]["goals_for"] = result[away_team_name]["goals_for"] + away_goals
result[away_team_name]["xg"] = result[away_team_name]["xg"] + away_xg
result[away_team_name]["xgot"] = result[away_team_name]["xgot"] + away_xgot
result[away_team_name]["goals_against"] = result[away_team_name]["goals_against"] + home_goals
result[away_team_name]["xg_against"] = result[away_team_name]["xg"] + home_xg
result[away_team_name]["xgot_against"] = result[away_team_name]["xgot"] + home_xgot
return result
def plot_one(x, y, icon_paths, title, x_label, y_label, ul_label, lr_label, filename):
fig, ax = plt.subplots(figsize=(10, 5.625))
# scatter plot with fancy icons
icon_width = 20
for i in range(len(x)):
image = plt.imread(icon_paths[i])
image_width = image.shape[1]
zoom = icon_width / image_width
offset_image = OffsetImage(image, zoom=zoom, interpolation="antialiased")
ab = AnnotationBbox(offset_image, (x[i], y[i]), xycoords='data', frameon=False)
ax.add_artist(ab)
# labels
ax.set_title(title)
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
# plots start at 0!
ax.set_xlim(max(0, min(x) - 5), max(x) + 5)
ax.set_ylim(max(0, min(y) - 5), max(y) + 5)
# dotted line at x==y
x_min, x_max = ax.get_xlim()
y_min, y_max = ax.get_ylim()
lims = [np.min([x_min, y_min]), np.max([x_max, y_max])]
ax.plot(lims, lims, 'k--', alpha=0.75, zorder=0)
# label upper-left region
x_upper_left = x_min + 0.05 * (x_max - x_min)
y_upper_left = y_max - 0.1 * (y_max - y_min)
ax.text(x_upper_left, y_upper_left, ul_label, fontsize=12, ha='left', va='top')
# label lower-right region
x_lower_right = x_max - 0.05 * (x_max - x_min)
y_lower_right = y_min + 0.1 * (y_max - y_min)
ax.text(x_lower_right, y_lower_right, lr_label, fontsize=12, ha='right', va='bottom')
filepath = os.path.join(SCRIPT_DIR, "output", filename)
os.makedirs(os.path.dirname(filepath), exist_ok=True) # python is regarded
plt.savefig(filepath, dpi=300, bbox_inches='tight')
if __name__ == "__main__":
# get season info
print("Loading season...")
season = get_season_info(LEAGUE_ID_MLS, SEASON_ID)
# get matches info
print("Loading matches...")
matches = get_matches(season)
# get xg data
print("Accumulating data...")
xg = accumulate_xg(matches)
if DEBUG_SPEW:
pprint.pprint(xg)
# shared plot data
icon_paths = [f"{SCRIPT_DIR}/data/icons/{team}.png" for team, entry in xg.items()]
# xG vs xGoT
x = [entry['xg'] for team, entry in xg.items()]
y = [entry['xgot'] for team, entry in xg.items()]
plot_one(x, y, icon_paths, f"MLS {SEASON_ID} - xG vs xGoT", "Expected Goals", "Expected Goals on Target", "Great Finishing", "Poor Finishing", f"01_mls_{SEASON_ID}_xg_vs_xgot.png")
# xG vs goals
x = [entry['xg'] for team, entry in xg.items()]
y = [entry['goals_for'] for team, entry in xg.items()]
plot_one(x, y, icon_paths, f"MLS {SEASON_ID} - xG vs Actual Goals", "Expected Goals", "Actual Goals", "Overperform", "Underperform", f"02_mls_{SEASON_ID}_xg_vs_goals.png")
# xGoT vs goals
x = [entry['xgot'] for team, entry in xg.items()]
y = [entry['goals_for'] for team, entry in xg.items()]
plot_one(x, y, icon_paths, f"MLS {SEASON_ID} - xGoT vs Actual Goals", "Expected Goals on Target", "Actual Goals", "Lucky", "Unlucky", f"03_mls_{SEASON_ID}_xgot_vs_goals.png")
# xG_against vs xGoT_against
x = [entry['xg_against'] for team, entry in xg.items()]
y = [entry['xgot_against'] for team, entry in xg.items()]
plot_one(x, y, icon_paths, f"MLS {SEASON_ID} - xG Against vs xGoT Against", "Expected Goals Against", "Expected Goals on Target Against", "Great Finishing (by opponent)", "Poor Finishing (by opponent)", f"04_mls_{SEASON_ID}_xg_against_vs_xgot_against.png")
# xG_against vs goals_against
x = [entry['xg_against'] for team, entry in xg.items()]
y = [entry['goals_against'] for team, entry in xg.items()]
plot_one(x, y, icon_paths, f"MLS {SEASON_ID} - xG Against vs Goals Against", "Expected Goals", "Actual Goals", "Opponent Overperform", "Opponent Underperform", f"05_mls_{SEASON_ID}_xg_against_vs_goals_against.png")
# xGoT_against vs goals_against
x = [entry['xgot_against'] for team, entry in xg.items()]
y = [entry['goals_against'] for team, entry in xg.items()]
plot_one(x, y, icon_paths, f"MLS {SEASON_ID} - xGoT Against vs Goals Against", "Expected Goals on Target", "Actual Goals", "Bad Keeper", "Good Keeper", f"06_mls_{SEASON_ID}_xgot_against_vs_goals_against.png")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment