Skip to content

Instantly share code, notes, and snippets.

@sertalpbilal
Last active June 15, 2023 16:05
Show Gist options
  • Save sertalpbilal/faf3e50a41d3ae51a2f38baa061df24d to your computer and use it in GitHub Desktop.
Save sertalpbilal/faf3e50a41d3ae51a2f38baa061df24d to your computer and use it in GitHub Desktop.
FPL League Data Download
import requests
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
from itertools import repeat
import json
import pandas as pd
from dateutil import parser
import time
import random
import string
import pathlib
season = '2022-2023'
target_folder = pathlib.Path(f"data/{season}/")
target_folder.mkdir(exist_ok=True, parents=True)
main_page = 'https://fantasy.premierleague.com/api/bootstrap-static/'
league_page = 'https://fantasy.premierleague.com/api/leagues-classic/{league_id}/standings/?page_standings={page_no}'
team_info_page = 'https://fantasy.premierleague.com/api/entry/{team_id}/'
team_tr_page = 'https://fantasy.premierleague.com/api/entry/{team_id}/transfers/'
team_history_page = 'https://fantasy.premierleague.com/api/entry/{team_id}/history/'
team_gw_page = 'https://fantasy.premierleague.com/api/entry/{team_id}/event/{gw}/picks/'
chip_names = {'wildcard': 'Wildcard', '3xc': 'Triple Captain', 'freehit': 'Free Hit', 'bboost': 'Bench Boost'}
def get_random_id(n):
return ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(n))
def generate_league_report(league_id, custom=False, custom_ids=None, custom_name='', nteams=None, max_teams=250):
main_data = requests.get(main_page).json()
pts_data = get_points_data()
elements = main_data['elements']
element_dict = {el['id']: el for el in elements}
gw_info = {event['id']: event for event in main_data['events']}
try:
with open(target_folder / f'league_data_{league_id}.json', 'r') as f:
league_data = json.load(f)
except:
if custom:
league_data = read_custom_league(custom_ids, custom_name, forced=True)
else:
league_data = read_league(league_id, forced=False, nteams=nteams, max_teams=max_teams)
# For debug
# with open(f'league_data_{league_id}.txt', 'w', encoding='utf-8') as f:
# f.write(str(league_data))
with open(target_folder / f'league_data_{league_id}.json', 'w') as f:
json.dump(league_data, f, indent=2)
team_data_dict = {t['info']['id']: t for t in league_data['data']}
# page 1: teams
teams_basic = league_data['teams']
print("Number of Teams", len(teams_basic))
corr_team_data = {t['info']['id']: t['info'] for t in league_data['data']}
pb_ids = []
for t in teams_basic:
try:
t.update(corr_team_data[t['entry']])
except KeyError:
pb_ids.append(t['entry'])
teams_basic = [t for t in teams_basic if t['entry'] not in pb_ids]
teams_df = pd.DataFrame(teams_basic)
team_key_cols = ['rank', 'player_name', 'entry_name', 'total', 'entry', 'joined_time', 'player_region_name', 'summary_overall_points', 'summary_overall_rank', 'last_deadline_total_transfers']
teams_df = teams_df[team_key_cols]
# page 2: gw results
gw_results = []
for t in teams_basic:
team_id = t['entry']
team_entry = team_data_dict[team_id]
team_chips = {c['event']: chip_names[c['name']] for c in team_entry['history']['chips']}
for gw_result in team_entry['history']['current']:
gw_results.append({
'entry': team_id,
'player_name': t['player_name'],
'gw': gw_result['event'], 'points': gw_result['points'],
'net_points': gw_result['points']-gw_result['event_transfers_cost'], 'total_points': gw_result['total_points'],
'gw_rank': gw_result['rank'], 'overall_rank': gw_result['overall_rank'], 'bank': gw_result['bank'],
'gw_hit_cost': gw_result['event_transfers_cost'], 'transfers': gw_result['event_transfers'],
'points_on_bench': gw_result['points_on_bench'], 'chip': team_chips.get(gw_result['event'], '')
})
gw_result_df = pd.DataFrame(gw_results)
# page 3: team/gw overall rank history
team_or = []
for t in teams_basic:
team_id = t['entry']
team_entry = team_data_dict[team_id]
gw_vals = {}
for gw_result in team_entry['history']['current']:
gw_vals[f'GW{gw_result["event"]}'] = gw_result['overall_rank']
entry = {'entry': team_id, 'entry_name': t['entry_name'], 'player_name': t['player_name']}
entry.update(gw_vals)
team_or.append(entry)
team_or_df = pd.DataFrame(team_or)
# page 4: team/gw overall rank history
team_gw = []
for t in teams_basic:
team_id = t['entry']
team_entry = team_data_dict[team_id]
gw_vals = {}
for gw_result in team_entry['history']['current']:
gw_vals[f'GW{gw_result["event"]}'] = gw_result['rank']
entry = {'entry': team_id, 'entry_name': t['entry_name'], 'player_name': t['player_name']}
entry.update(gw_vals)
team_gw.append(entry)
team_gw_df = pd.DataFrame(team_gw)
# page 5: transfers
transfers = []
for t in teams_basic:
team_id = t['entry']
team_entry = team_data_dict[team_id]
team_transfers = team_entry['transfers']
team_chips = {c['event']: chip_names[c['name']] for c in team_entry['history']['chips']}
for transfer in team_transfers:
deadline = parser.parse(gw_info[transfer['event']]['deadline_time'])
tr_time = parser.parse(transfer['time'])
transfers.append({
'entry': team_id,
'entry_name': t['entry_name'],
'player_name': t['player_name'],
'gw': transfer['event'],
'sold_id': transfer['element_out'],
'sold': element_dict[transfer['element_out']]['web_name'],
'sold_cost': transfer['element_out_cost']/10,
'bought_id': transfer['element_in'],
'bought': element_dict[transfer['element_in']]['web_name'],
'bought_cost': transfer['element_in_cost']/10,
'time': transfer['time'],
'gw_deadline': gw_info[transfer['event']]['deadline_time'],
'time_before_deadline': deadline-tr_time,
'seconds_before_deadline': (deadline-tr_time).total_seconds(),
'chip': team_chips.get(transfer['event'], '')
})
transfers_df = pd.DataFrame(transfers)
# page 6: rank history
ranks = []
for t in teams_basic:
team_id = t['entry']
team_entry = team_data_dict[team_id]
for h in team_entry['history']['past']:
ranks.append({
'entry': team_id,
'entry_name': t['entry_name'],
'player_name': t['player_name'],
'season': h['season_name'],
'rank': h['rank'],
'points': h['total_points']
})
# ranks.append({
# 'entry': team_id,
# 'entry_name': t['entry_name'],
# 'player_name': t['player_name'],
# 'season': '2021/22',
# 'rank': team_entry['history']['current'][-1]['overall_rank'],
# 'points': team_entry['history']['current'][-1]['total_points']
# })
ranks_df = pd.DataFrame(ranks)
# page 7: players picks
picks_data = []
for t in teams_basic:
team_id = t['entry']
team_entry = team_data_dict[team_id]
s_picks = team_entry['picks']
team_chips = {c['event']: chip_names[c['name']] for c in team_entry['history']['chips']}
for w in s_picks:
try:
event = w['entry_history']['event']
except:
break
for p in w['picks']:
pts = pts_data[event].get(p['element'], 0)
picks_data.append({
'entry': team_id,
'entry_name': t['entry_name'],
'player_name': t['player_name'],
'gw': event,
'chip': team_chips.get(event, ''),
'order': p['position'],
'pick_id': p['element'],
'pick_name': element_dict[p['element']]['web_name'],
'multiplier': p['multiplier'],
'cap': 1 if p['is_captain'] else 0,
'vice_cap': 1 if p['is_vice_captain'] else 0,
'raw_pts': pts,
'net_pts': p['multiplier'] * pts
})
picks_df = pd.DataFrame(picks_data)
with pd.ExcelWriter(target_folder / f"league_{league_id}_{custom_name}.xlsx") as writer:
teams_df.to_excel(writer, sheet_name='Teams', index=False)
gw_result_df.to_excel(writer, sheet_name='GW Results', index=False)
team_or_df.to_excel(writer, sheet_name='Team OR Progression', index=False)
team_gw_df.to_excel(writer, sheet_name='Team GW Ranks', index=False)
transfers_df.to_excel(writer, sheet_name='Transfers', index=False)
ranks_df.to_excel(writer, sheet_name='Rank History', index=False)
picks_df.to_excel(writer, sheet_name='Picks', index=False)
def read_league(league_id, forced=False, nteams=None, max_teams=None):
teams = []
has_next = True
page_no = 1
while has_next and (nteams is None or len(teams) <= nteams):
page = league_page.format(league_id=league_id, page_no=page_no)
response = requests.get(page)
res_json = response.json()
if page_no == 1:
league_name = res_json['league']['name']
has_next = res_json['standings']['has_next']
page_no += 1
teams += [team for team in res_json['standings']['results']]
if max_teams is not None and len(teams) > max_teams:
teams = teams[:max_teams]
team_ids = [t['entry'] for t in teams if t is not None]
responses = fetch_data_from_ids(team_ids, league_name, forced)
return {'id': league_id, 'name': league_name, 'team_ids': team_ids, 'teams': teams, 'data': responses}
def read_custom_league(raw_team_ids, name, forced=False):
teams = []
team_ids = []
for team in raw_team_ids:
try:
team_json = requests.get(team_info_page.format(team_id=team)).json()
teams.append({'id': team_json['id'], 'rank': team_json['summary_overall_points'], 'entry': team, 'total': team_json['summary_overall_points'], 'player_name': team_json['player_first_name'] + ' ' + team_json['player_last_name'], 'entry_name': team_json['name']})
team_ids.append(team)
except:
continue
responses = fetch_data_from_ids(team_ids, name, forced)
return {'id': name, 'name': name, 'team_ids': team_ids, 'teams': teams, 'data': responses}
def fetch_data_from_ids(team_ids, league_name, forced=False):
league_size = len(team_ids)
if not forced:
if league_size > 500:
print("League has more than 500 entries, this might take a while to parse...")
raise RuntimeError("League has more than 500 entries, this might take a while to parse... Set forced=True if you want to proceed.")
print(f'League ({league_name}) has {league_size} teams')
with ProcessPoolExecutor(max_workers=8) as executor:
responses = executor.map(get_team_details, team_ids)
responses = [r for r in responses if r is not None]
return responses
def get_team_details(team):
try:
team_json = requests.get(team_info_page.format(team_id=team)).json()
except:
return None
t_info = team_json
del t_info['leagues']
print("\tInfo")
tr_json = requests.get(team_tr_page.format(team_id=team)).json()
t_transfers = tr_json
print("\tTransfers")
history_json = requests.get(team_history_page.format(team_id=team)).json()
t_history = history_json
print("\tHistory")
t_picks = {}
print("\tGW Picks")
gws = list(range(1,39))
with ThreadPoolExecutor(max_workers=2) as executor:
response = executor.map(get_team_gw_picks, repeat(team), gws)
t_picks = [r for r in response if r is not None]
team_details = {'info': t_info, 'transfers': t_transfers, 'history': t_history, 'picks': t_picks}
# print(team_details)
time.sleep(1)
return team_details
def get_team_gw_picks(team, gw):
try:
gw_json = requests.get(team_gw_page.format(team_id=team, gw=gw)).json()
time.sleep(0.1)
return gw_json
except:
return None
def get_points_data():
try:
with open(target_folder / 'rp.json') as f:
data = json.load(f)
data = {int(k): v for (k,v) in data.items()}
except:
data = {}
for gw in range(1,39):
gw_response = requests.get(f'https://fantasy.premierleague.com/api/event/{gw}/live/')
data[gw] = gw_response.json()['elements']
with open(target_folder / 'rp.json', 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2)
rp_data = {}
for gw in range(1,39):
gw_data = data[gw]
gw_entry = rp_data[gw] = {}
for e in gw_data:
gw_entry[e['id']] = sum([stat['points'] for game in e['explain'] for stat in game['stats']])
return rp_data
def read_fplresearch(size=100):
all = pd.read_excel(target_folder / 'fplresearch.xlsx', skiprows=3, nrows=size, index_col=None)
team_ids = all.head(size)['Team ID'].astype(int).to_list()
generate_league_report(league_id=f'fplresearch{size}', custom=True, custom_ids=team_ids, custom_name=f'fplresearch{size}')
def read_fplreview(size=250, sheet='MassiveData'):
all = pd.read_excel(target_folder / 'fplreview.xlsx', skiprows=3, nrows=size, index_col=None, sheet_name=sheet)
team_ids = all.head(size)['TeamID'].astype(int).to_list()
generate_league_report(league_id=f'fplreview{size}-{sheet}', custom=True, custom_ids=team_ids, custom_name=f'fplreview{size}-{sheet}')
def read_random_teams_or(size=10, within=1000000, given_ranks=None):
if given_ranks is not None:
ranks = given_ranks
else:
ranks = random.sample(range(1, within+1), size)
print("Ranks", ranks)
URLS = []
ORDERS = []
for rank in ranks:
page = ((rank-1)//50)+1
order = (rank-1) % 50
URLS.append(f"https://fantasy.premierleague.com/api/leagues-classic/314/standings/?page_standings={page}")
ORDERS.append(order)
with ThreadPoolExecutor(max_workers=10) as executor:
results = list(executor.map(requests.get, URLS))
res_json = [r.json() for r in results if r is not None]
vals = list(zip(res_json, ORDERS))
team_ids = [i[0]['standings']['results'][i[1]]['entry'] for i in vals]
print(team_ids)
rand_name = get_random_id(5)
generate_league_report(league_id=f'random{size}_{rand_name}', custom=True, custom_ids=team_ids, custom_name=f'random{size}_{rand_name}')
def get_single_team_data(team_id):
team_ids = [team_id]
generate_league_report(league_id=f'single_{team_id}', custom=True, custom_ids=team_ids, custom_name=f'single_{team_id}')
if __name__ == "__main__":
# Elite 64 Analytics Div 1
generate_league_report(935263, custom_name='ea64div1', max_teams=250)
@asfoury
Copy link

asfoury commented Jun 15, 2023

The script does not work on leagues that contain players that have joined after the first GW has passed, I did not spent much time trying to understand why but I was able to get it working by adding this code :
line 58:

pb_ids = []
for t in teams_basic:
        try:
            t.update(corr_team_data[t['entry']])
        except KeyError:
            pb_ids.append(t['entry'])

before line 66:
teams_basic = [t for t in teams_basic if t['entry'] not in pb_ids]

@sertalpbilal
Copy link
Author

sertalpbilal commented Jun 15, 2023

Thannks @asfoury I have added your changes.
Can you provide the league ID you have tried? I can write a proper fix.

@asfoury
Copy link

asfoury commented Jun 15, 2023

My pleasure! Sure here is the league id : 431488

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment