Last active
June 15, 2023 16:05
-
-
Save sertalpbilal/faf3e50a41d3ae51a2f38baa061df24d to your computer and use it in GitHub Desktop.
FPL League Data Download
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor | |
from itertools import repeat | |
import json | |
import pandas as pd | |
from dateutil import parser | |
import time | |
import random | |
import string | |
import pathlib | |
season = '2022-2023' | |
target_folder = pathlib.Path(f"data/{season}/") | |
target_folder.mkdir(exist_ok=True, parents=True) | |
main_page = 'https://fantasy.premierleague.com/api/bootstrap-static/' | |
league_page = 'https://fantasy.premierleague.com/api/leagues-classic/{league_id}/standings/?page_standings={page_no}' | |
team_info_page = 'https://fantasy.premierleague.com/api/entry/{team_id}/' | |
team_tr_page = 'https://fantasy.premierleague.com/api/entry/{team_id}/transfers/' | |
team_history_page = 'https://fantasy.premierleague.com/api/entry/{team_id}/history/' | |
team_gw_page = 'https://fantasy.premierleague.com/api/entry/{team_id}/event/{gw}/picks/' | |
chip_names = {'wildcard': 'Wildcard', '3xc': 'Triple Captain', 'freehit': 'Free Hit', 'bboost': 'Bench Boost'} | |
def get_random_id(n): | |
return ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(n)) | |
def generate_league_report(league_id, custom=False, custom_ids=None, custom_name='', nteams=None, max_teams=250): | |
main_data = requests.get(main_page).json() | |
pts_data = get_points_data() | |
elements = main_data['elements'] | |
element_dict = {el['id']: el for el in elements} | |
gw_info = {event['id']: event for event in main_data['events']} | |
try: | |
with open(target_folder / f'league_data_{league_id}.json', 'r') as f: | |
league_data = json.load(f) | |
except: | |
if custom: | |
league_data = read_custom_league(custom_ids, custom_name, forced=True) | |
else: | |
league_data = read_league(league_id, forced=False, nteams=nteams, max_teams=max_teams) | |
# For debug | |
# with open(f'league_data_{league_id}.txt', 'w', encoding='utf-8') as f: | |
# f.write(str(league_data)) | |
with open(target_folder / f'league_data_{league_id}.json', 'w') as f: | |
json.dump(league_data, f, indent=2) | |
team_data_dict = {t['info']['id']: t for t in league_data['data']} | |
# page 1: teams | |
teams_basic = league_data['teams'] | |
print("Number of Teams", len(teams_basic)) | |
corr_team_data = {t['info']['id']: t['info'] for t in league_data['data']} | |
pb_ids = [] | |
for t in teams_basic: | |
try: | |
t.update(corr_team_data[t['entry']]) | |
except KeyError: | |
pb_ids.append(t['entry']) | |
teams_basic = [t for t in teams_basic if t['entry'] not in pb_ids] | |
teams_df = pd.DataFrame(teams_basic) | |
team_key_cols = ['rank', 'player_name', 'entry_name', 'total', 'entry', 'joined_time', 'player_region_name', 'summary_overall_points', 'summary_overall_rank', 'last_deadline_total_transfers'] | |
teams_df = teams_df[team_key_cols] | |
# page 2: gw results | |
gw_results = [] | |
for t in teams_basic: | |
team_id = t['entry'] | |
team_entry = team_data_dict[team_id] | |
team_chips = {c['event']: chip_names[c['name']] for c in team_entry['history']['chips']} | |
for gw_result in team_entry['history']['current']: | |
gw_results.append({ | |
'entry': team_id, | |
'player_name': t['player_name'], | |
'gw': gw_result['event'], 'points': gw_result['points'], | |
'net_points': gw_result['points']-gw_result['event_transfers_cost'], 'total_points': gw_result['total_points'], | |
'gw_rank': gw_result['rank'], 'overall_rank': gw_result['overall_rank'], 'bank': gw_result['bank'], | |
'gw_hit_cost': gw_result['event_transfers_cost'], 'transfers': gw_result['event_transfers'], | |
'points_on_bench': gw_result['points_on_bench'], 'chip': team_chips.get(gw_result['event'], '') | |
}) | |
gw_result_df = pd.DataFrame(gw_results) | |
# page 3: team/gw overall rank history | |
team_or = [] | |
for t in teams_basic: | |
team_id = t['entry'] | |
team_entry = team_data_dict[team_id] | |
gw_vals = {} | |
for gw_result in team_entry['history']['current']: | |
gw_vals[f'GW{gw_result["event"]}'] = gw_result['overall_rank'] | |
entry = {'entry': team_id, 'entry_name': t['entry_name'], 'player_name': t['player_name']} | |
entry.update(gw_vals) | |
team_or.append(entry) | |
team_or_df = pd.DataFrame(team_or) | |
# page 4: team/gw overall rank history | |
team_gw = [] | |
for t in teams_basic: | |
team_id = t['entry'] | |
team_entry = team_data_dict[team_id] | |
gw_vals = {} | |
for gw_result in team_entry['history']['current']: | |
gw_vals[f'GW{gw_result["event"]}'] = gw_result['rank'] | |
entry = {'entry': team_id, 'entry_name': t['entry_name'], 'player_name': t['player_name']} | |
entry.update(gw_vals) | |
team_gw.append(entry) | |
team_gw_df = pd.DataFrame(team_gw) | |
# page 5: transfers | |
transfers = [] | |
for t in teams_basic: | |
team_id = t['entry'] | |
team_entry = team_data_dict[team_id] | |
team_transfers = team_entry['transfers'] | |
team_chips = {c['event']: chip_names[c['name']] for c in team_entry['history']['chips']} | |
for transfer in team_transfers: | |
deadline = parser.parse(gw_info[transfer['event']]['deadline_time']) | |
tr_time = parser.parse(transfer['time']) | |
transfers.append({ | |
'entry': team_id, | |
'entry_name': t['entry_name'], | |
'player_name': t['player_name'], | |
'gw': transfer['event'], | |
'sold_id': transfer['element_out'], | |
'sold': element_dict[transfer['element_out']]['web_name'], | |
'sold_cost': transfer['element_out_cost']/10, | |
'bought_id': transfer['element_in'], | |
'bought': element_dict[transfer['element_in']]['web_name'], | |
'bought_cost': transfer['element_in_cost']/10, | |
'time': transfer['time'], | |
'gw_deadline': gw_info[transfer['event']]['deadline_time'], | |
'time_before_deadline': deadline-tr_time, | |
'seconds_before_deadline': (deadline-tr_time).total_seconds(), | |
'chip': team_chips.get(transfer['event'], '') | |
}) | |
transfers_df = pd.DataFrame(transfers) | |
# page 6: rank history | |
ranks = [] | |
for t in teams_basic: | |
team_id = t['entry'] | |
team_entry = team_data_dict[team_id] | |
for h in team_entry['history']['past']: | |
ranks.append({ | |
'entry': team_id, | |
'entry_name': t['entry_name'], | |
'player_name': t['player_name'], | |
'season': h['season_name'], | |
'rank': h['rank'], | |
'points': h['total_points'] | |
}) | |
# ranks.append({ | |
# 'entry': team_id, | |
# 'entry_name': t['entry_name'], | |
# 'player_name': t['player_name'], | |
# 'season': '2021/22', | |
# 'rank': team_entry['history']['current'][-1]['overall_rank'], | |
# 'points': team_entry['history']['current'][-1]['total_points'] | |
# }) | |
ranks_df = pd.DataFrame(ranks) | |
# page 7: players picks | |
picks_data = [] | |
for t in teams_basic: | |
team_id = t['entry'] | |
team_entry = team_data_dict[team_id] | |
s_picks = team_entry['picks'] | |
team_chips = {c['event']: chip_names[c['name']] for c in team_entry['history']['chips']} | |
for w in s_picks: | |
try: | |
event = w['entry_history']['event'] | |
except: | |
break | |
for p in w['picks']: | |
pts = pts_data[event].get(p['element'], 0) | |
picks_data.append({ | |
'entry': team_id, | |
'entry_name': t['entry_name'], | |
'player_name': t['player_name'], | |
'gw': event, | |
'chip': team_chips.get(event, ''), | |
'order': p['position'], | |
'pick_id': p['element'], | |
'pick_name': element_dict[p['element']]['web_name'], | |
'multiplier': p['multiplier'], | |
'cap': 1 if p['is_captain'] else 0, | |
'vice_cap': 1 if p['is_vice_captain'] else 0, | |
'raw_pts': pts, | |
'net_pts': p['multiplier'] * pts | |
}) | |
picks_df = pd.DataFrame(picks_data) | |
with pd.ExcelWriter(target_folder / f"league_{league_id}_{custom_name}.xlsx") as writer: | |
teams_df.to_excel(writer, sheet_name='Teams', index=False) | |
gw_result_df.to_excel(writer, sheet_name='GW Results', index=False) | |
team_or_df.to_excel(writer, sheet_name='Team OR Progression', index=False) | |
team_gw_df.to_excel(writer, sheet_name='Team GW Ranks', index=False) | |
transfers_df.to_excel(writer, sheet_name='Transfers', index=False) | |
ranks_df.to_excel(writer, sheet_name='Rank History', index=False) | |
picks_df.to_excel(writer, sheet_name='Picks', index=False) | |
def read_league(league_id, forced=False, nteams=None, max_teams=None): | |
teams = [] | |
has_next = True | |
page_no = 1 | |
while has_next and (nteams is None or len(teams) <= nteams): | |
page = league_page.format(league_id=league_id, page_no=page_no) | |
response = requests.get(page) | |
res_json = response.json() | |
if page_no == 1: | |
league_name = res_json['league']['name'] | |
has_next = res_json['standings']['has_next'] | |
page_no += 1 | |
teams += [team for team in res_json['standings']['results']] | |
if max_teams is not None and len(teams) > max_teams: | |
teams = teams[:max_teams] | |
team_ids = [t['entry'] for t in teams if t is not None] | |
responses = fetch_data_from_ids(team_ids, league_name, forced) | |
return {'id': league_id, 'name': league_name, 'team_ids': team_ids, 'teams': teams, 'data': responses} | |
def read_custom_league(raw_team_ids, name, forced=False): | |
teams = [] | |
team_ids = [] | |
for team in raw_team_ids: | |
try: | |
team_json = requests.get(team_info_page.format(team_id=team)).json() | |
teams.append({'id': team_json['id'], 'rank': team_json['summary_overall_points'], 'entry': team, 'total': team_json['summary_overall_points'], 'player_name': team_json['player_first_name'] + ' ' + team_json['player_last_name'], 'entry_name': team_json['name']}) | |
team_ids.append(team) | |
except: | |
continue | |
responses = fetch_data_from_ids(team_ids, name, forced) | |
return {'id': name, 'name': name, 'team_ids': team_ids, 'teams': teams, 'data': responses} | |
def fetch_data_from_ids(team_ids, league_name, forced=False): | |
league_size = len(team_ids) | |
if not forced: | |
if league_size > 500: | |
print("League has more than 500 entries, this might take a while to parse...") | |
raise RuntimeError("League has more than 500 entries, this might take a while to parse... Set forced=True if you want to proceed.") | |
print(f'League ({league_name}) has {league_size} teams') | |
with ProcessPoolExecutor(max_workers=8) as executor: | |
responses = executor.map(get_team_details, team_ids) | |
responses = [r for r in responses if r is not None] | |
return responses | |
def get_team_details(team): | |
try: | |
team_json = requests.get(team_info_page.format(team_id=team)).json() | |
except: | |
return None | |
t_info = team_json | |
del t_info['leagues'] | |
print("\tInfo") | |
tr_json = requests.get(team_tr_page.format(team_id=team)).json() | |
t_transfers = tr_json | |
print("\tTransfers") | |
history_json = requests.get(team_history_page.format(team_id=team)).json() | |
t_history = history_json | |
print("\tHistory") | |
t_picks = {} | |
print("\tGW Picks") | |
gws = list(range(1,39)) | |
with ThreadPoolExecutor(max_workers=2) as executor: | |
response = executor.map(get_team_gw_picks, repeat(team), gws) | |
t_picks = [r for r in response if r is not None] | |
team_details = {'info': t_info, 'transfers': t_transfers, 'history': t_history, 'picks': t_picks} | |
# print(team_details) | |
time.sleep(1) | |
return team_details | |
def get_team_gw_picks(team, gw): | |
try: | |
gw_json = requests.get(team_gw_page.format(team_id=team, gw=gw)).json() | |
time.sleep(0.1) | |
return gw_json | |
except: | |
return None | |
def get_points_data(): | |
try: | |
with open(target_folder / 'rp.json') as f: | |
data = json.load(f) | |
data = {int(k): v for (k,v) in data.items()} | |
except: | |
data = {} | |
for gw in range(1,39): | |
gw_response = requests.get(f'https://fantasy.premierleague.com/api/event/{gw}/live/') | |
data[gw] = gw_response.json()['elements'] | |
with open(target_folder / 'rp.json', 'w', encoding='utf-8') as f: | |
json.dump(data, f, indent=2) | |
rp_data = {} | |
for gw in range(1,39): | |
gw_data = data[gw] | |
gw_entry = rp_data[gw] = {} | |
for e in gw_data: | |
gw_entry[e['id']] = sum([stat['points'] for game in e['explain'] for stat in game['stats']]) | |
return rp_data | |
def read_fplresearch(size=100): | |
all = pd.read_excel(target_folder / 'fplresearch.xlsx', skiprows=3, nrows=size, index_col=None) | |
team_ids = all.head(size)['Team ID'].astype(int).to_list() | |
generate_league_report(league_id=f'fplresearch{size}', custom=True, custom_ids=team_ids, custom_name=f'fplresearch{size}') | |
def read_fplreview(size=250, sheet='MassiveData'): | |
all = pd.read_excel(target_folder / 'fplreview.xlsx', skiprows=3, nrows=size, index_col=None, sheet_name=sheet) | |
team_ids = all.head(size)['TeamID'].astype(int).to_list() | |
generate_league_report(league_id=f'fplreview{size}-{sheet}', custom=True, custom_ids=team_ids, custom_name=f'fplreview{size}-{sheet}') | |
def read_random_teams_or(size=10, within=1000000, given_ranks=None): | |
if given_ranks is not None: | |
ranks = given_ranks | |
else: | |
ranks = random.sample(range(1, within+1), size) | |
print("Ranks", ranks) | |
URLS = [] | |
ORDERS = [] | |
for rank in ranks: | |
page = ((rank-1)//50)+1 | |
order = (rank-1) % 50 | |
URLS.append(f"https://fantasy.premierleague.com/api/leagues-classic/314/standings/?page_standings={page}") | |
ORDERS.append(order) | |
with ThreadPoolExecutor(max_workers=10) as executor: | |
results = list(executor.map(requests.get, URLS)) | |
res_json = [r.json() for r in results if r is not None] | |
vals = list(zip(res_json, ORDERS)) | |
team_ids = [i[0]['standings']['results'][i[1]]['entry'] for i in vals] | |
print(team_ids) | |
rand_name = get_random_id(5) | |
generate_league_report(league_id=f'random{size}_{rand_name}', custom=True, custom_ids=team_ids, custom_name=f'random{size}_{rand_name}') | |
def get_single_team_data(team_id): | |
team_ids = [team_id] | |
generate_league_report(league_id=f'single_{team_id}', custom=True, custom_ids=team_ids, custom_name=f'single_{team_id}') | |
if __name__ == "__main__": | |
# Elite 64 Analytics Div 1 | |
generate_league_report(935263, custom_name='ea64div1', max_teams=250) |
Thannks @asfoury I have added your changes.
Can you provide the league ID you have tried? I can write a proper fix.
My pleasure! Sure here is the league id : 431488
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The script does not work on leagues that contain players that have joined after the first GW has passed, I did not spent much time trying to understand why but I was able to get it working by adding this code :
line 58:
before line 66:
teams_basic = [t for t in teams_basic if t['entry'] not in pb_ids]