Skip to content

Instantly share code, notes, and snippets.

@tonyelhabr
Created August 27, 2023 22:20
Show Gist options
  • Save tonyelhabr/335b54fb1d937c0f8a817defe2c5ab6b to your computer and use it in GitHub Desktop.
Save tonyelhabr/335b54fb1d937c0f8a817defe2c5ab6b to your computer and use it in GitHub Desktop.
Pull xG data from StatsBomb and Opta (via FBRef) for the 2023 FIFA Women's World Cup

Raw data pull

library(StatsBombR)
library(worldfootballR)
library(dplyr)
library(janitor)
library(tibble)

sb_free_comps <- FreeCompetitions()
sb_comp <- filter(
  sb_free_comps, 
  competition_name == "Women's World Cup",
  season_name == '2023'
)
sb_matches <- FreeMatches(sb_comp)
sb_shots <- free_allevents(MatchesDF = sb_matches) |> 
  as_tibble() |> 
  clean_names() |> 
  filter(!is.na(shot_statsbomb_xg))

fb_match_urls <- fb_match_urls(
  country = '', 
  gender = 'F', 
  season_end_year = 2023, 
  tier = '', 
  non_dom_league_url = 'https://fbref.com/en/comps/106/history/Womens-World-Cup-Seasons'
)

opta_shots <- fb_match_shooting(fb_match_urls) |> 
  as_tibble() |> 
  clean_names()

Extract non-penalty shots and bring the data sets together.

sb_np_shots <- sb_shots |> 
  filter(
    shot_type_name != 'Penalty'
  ) |>
  transmute(
    xg = shot_statsbomb_xg,
    g = shot_outcome_name == 'Goal'
  )

opta_np_shots <- opta_shots |> 
  mutate(across(x_g, as.numeric)) |> 
  ## heuristic for penalties
  filter(
    !(distance == '13' & x_g == 0.79)
  ) |> 
  transmute(
    xg = x_g,
    g = outcome == 'Goal'
  )
  
np_shots <- bind_rows(
  sb_np_shots |> mutate(source = 'StatsBomb'),
  opta_np_shots |> mutate(source = 'Opta')
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment