Skip to content

Instantly share code, notes, and snippets.

@tonyelhabr
Last active July 23, 2023 15:32
Show Gist options
  • Save tonyelhabr/1512caff5c1faf5197d0289f90814346 to your computer and use it in GitHub Desktop.
Save tonyelhabr/1512caff5c1faf5197d0289f90814346 to your computer and use it in GitHub Desktop.
Get stats by player for a 2022 World Cup match.

Scraping a specific stat for this match. Inspiration here. You can get other player stats in a similar fashion.

library(httr)
library(tibble)
library(tidyr)
library(dplyr)
library(purrr)
library(janitor)

stats_resp <- GET('https://fdh-api.fifa.com/v1/stats/match/128084/players.json')
stats <- stats_resp |> 
  content() |> 
  enframe('player_id', 'values') |> 
  unnest_longer(values) |> 
  hoist(
    values,
    'stat' = 1,
    'value' = 2
  ) |> 
  select(-values) |> 
  mutate(
    across(player_id, as.integer)
  ) |> 
  pivot_wider(
    names_from = stat, 
    values_from = value
  ) |> 
  clean_names()

squad_resp <- GET('https://play.fifa.com/json/fantasy/squads_fifa.json')
player_resp <- GET('https://play.fifa.com/json/fantasy/players.json')

squad_cont <- content(squad_resp)
player_cont <- content(player_resp)

squads <- tibble(
  squad_id = squad_cont |> map_int(~pluck(.x, 'id')),
  country = squad_cont |> map_chr(~pluck(.x, 'name'))
)

players <- tibble(
  player_id = player_cont |> map_int(~pluck(.x, 'id')),
  squad_id = player_cont |> map_int(~pluck(.x, 'squadId')),
  player = player_cont |> map_chr(~pluck(.x, 'name')),
  position = player_cont |> map_chr(~pluck(.x, 'position'))
)

stats |> 
  inner_join(players, by = 'player_id') |> 
  inner_join(squads, by = 'squad_id') |> 
  select(player, country, linebreaks_attempted_completed) |> 
  arrange(desc(linebreaks_attempted_completed)) |> 
  slice_max(linebreaks_attempted_completed, n = 10)
#> # A tibble: 10 × 3
#>    player            country linebreaks_attempted_completed
#>    <chr>             <chr>                            <dbl>
#>  1 Bassam Hisham     Qatar                               23
#>  2 Abdelkarim Hassan Qatar                               21
#>  3 Angelo Preciado   Ecuador                             16
#>  4 Pervis Estupinan  Ecuador                             14
#>  5 Jhegson Mendez    Ecuador                             14
#>  6 Moises Caicedo    Ecuador                             14
#>  7 Karim Boudiaf     Qatar                               13
#>  8 Felix Torres      Ecuador                              9
#>  9 Piero Hincapie    Ecuador                              9
#> 10 Akram Afif        Qatar                                8

You can functionalize the above code by result_id so that you can retrieve the same data for other completed matches.

matches_resp <- GET('https://api.fifa.com/api/v3/calendar/matches?language=en&count=500&idSeason=255711')
results <- content(matches_resp) |> pluck('Results')

## for incomplete matches (anything beyond the group stage at the moment), there will be `NULL`s
##   which causes `pluck()` to throw an error. using a `.default` of `NA` fixes the issue.
pluck2 <- partial(pluck, .default = NA_character_, ... = )

map_pluck_chr <- function(x, ...) {
  map_chr(x, pluck2, ...)
}

map_pluck_results_chr <- function(...) {
  results |> map_pluck_chr(...)
}

matches <- tibble(
  ## this won't join with the match stats, but it seems to be Fifa's "true" match ID
  match_id = map_pluck_results_chr('IdMatch') |> as.integer(),
  ## use this to join with the match stats
  results_id = map_pluck_results_chr('Properties', 'IdIFES') |> as.integer(),
  home_abbr = map_pluck_results_chr('Home', 'IdCountry'),
  away_abbr = map_pluck_results_chr('Away', 'IdCountry')
)
#> # A tibble: 64 × 4
#>     match_id results_id home_abbr away_abbr
#>        <int>      <int> <chr>     <chr>    
#>  1 400128082     128084 QAT       ECU      
#>  2 400235458     132997 ENG       IRN      
#>  3 400235449     132996 SEN       NED      
#>  4 400235455     132995 USA       WAL      
#>  5 400235461     133001 ARG       KSA      
#>  6 400235466     132999 DEN       TUN      
#>  7 400235463     133000 MEX       POL      
#>  8 400235470     132998 FRA       AUS      
#>  9 400235481     133003 MAR       CRO      
#> 10 400235476     133005 GER       JPN      
#> # … with 54 more rows
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment