Skip to content

Instantly share code, notes, and snippets.

@sertalpbilal
Created October 13, 2022 15:34
Show Gist options
  • Save sertalpbilal/a8273d63e55a328220adf2b75027194f to your computer and use it in GitHub Desktop.
Save sertalpbilal/a8273d63e55a328220adf2b75027194f to your computer and use it in GitHub Desktop.
FPL Data Prep (Convert any type of prediction data into FPLReview format)
import pandas as pd
import pathlib
import os
import json
from unicodedata import combining, normalize
import requests
from fuzzywuzzy import fuzz
import numpy as np
from abc import ABC, abstractmethod
class DataSource(ABC):
def __init__(self, source):
self.source = source
@abstractmethod
def convert(self, target):
pass
# To remove accents in names
def fix_name_dialect(name):
new_name = ''.join([c for c in normalize('NFKD', name) if not combining(c)])
return new_name.replace('Ø', 'O').replace('ø', 'o').replace('ã', 'a')
def get_best_score(r):
return max(r['wn_score'], r['cn_score'])
class Review(DataSource):
def __init__(self, source):
super().__init__(source)
def convert(self, target=None):
df = pd.read_csv(index_col=0)
if target:
df.to_csv(target)
return df
class Mikkel(DataSource):
def __init__(self, source):
super().__init__(source)
# To add FPL ID column to Mikkel's data and clean empty rows
def fix_mikkel(self):
df = pd.read_csv(self.source, encoding='latin1')
remove_accents = fix_name_dialect
r = requests.get("https://fantasy.premierleague.com/api/bootstrap-static/")
players = r.json()['elements']
mikkel_team_dict = {
'BHA': 'BRI',
'CRY': 'CPL',
'NFO': 'NOT',
'SOU': 'SOT',
'WHU': 'WHM'
}
teams = r.json()['teams']
for t in teams:
t['mikkel_short'] = mikkel_team_dict.get(t['short_name'], t['short_name'])
df['BCV_numeric'] = pd.to_numeric(df[' BCV '], errors='coerce')
df_cleaned = df[~((df['Player'] == '0') | (df['No.'].isnull()) | (df['BCV_numeric'].isnull()) | (df['No.'].isnull()))].copy()
print(len(df), len(df_cleaned))
df_cleaned['Clean_Name'] = df_cleaned['Player'].apply(remove_accents)
df_cleaned.head()
mikkel_team_fix = {'WHU': 'WHM'}
df_cleaned['Team'] = df_cleaned['Team'].replace(mikkel_team_fix)
df_cleaned['Position'] = df_cleaned['Position'].replace({'GK': 'G'})
element_type_dict = {1: 'G', 2: 'D', 3: 'M', 4: 'F'}
team_code_dict = {i['code']: i for i in teams}
player_names = [{
'id': e['id'],
'web_name': e['web_name'],
'combined': e['first_name'] + ' ' + e['second_name'],
'team': team_code_dict[e['team_code']]['mikkel_short'],
'position': element_type_dict[e['element_type']],
} for e in players]
for target in player_names:
target['wn'] = remove_accents(target['web_name'])
target['cn'] = remove_accents(target['combined'])
entries = []
for player in df_cleaned.iloc:
possible_matches = [i for i in player_names if i['team'] == player['Team'] and i['position'] == player['Position']]
for target in possible_matches:
p = player['Clean_Name']
target['wn_score'] = fuzz.token_set_ratio(p,target['wn'])
target['cn_score'] = fuzz.token_set_ratio(p,target['cn'])
best_match = max(possible_matches, key=get_best_score)
entries.append({'player_input': player['Player'], 'team_input': player['Team'], 'position_input': player['Position'], **best_match})
# print(player['Player'], player['Team'], best_match)
entries_df = pd.DataFrame(entries)
entries_df['name_team'] = entries_df['player_input'] + ' @ ' + entries_df['team_input']
entry_dict = entries_df.set_index('name_team')['id'].to_dict()
df_cleaned['name_team'] = df_cleaned['Player'] + ' @ ' + df_cleaned['Team']
df_cleaned['FPL ID'] = df_cleaned['name_team'].map(entry_dict)
existing_ids = df_cleaned['FPL ID'].tolist()
missing_players = []
for p in players:
if p['id'] in existing_ids:
continue
missing_players.append({
'Position': element_type_dict[p['element_type']],
'Player': p['web_name'],
' Price ': p['now_cost'] / 10,
'FPL ID': p['id'],
' Weighted minutes ': 0
})
df_full = pd.concat([df_cleaned, pd.DataFrame(missing_players)]).fillna(0)
return df_full
# To convert cleaned Mikkel data into Review format
def convert(self, target=None):
# Read and add ID column
raw_data = self.fix_mikkel()
static_url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
r = requests.get(static_url).json()
teams = r['teams']
new_names = {i: i.strip() for i in raw_data.columns}
raw_data.rename(columns=new_names, inplace=True)
df_clean = raw_data[raw_data['Price'] < 20].copy()
df_clean['Weighted minutes'].fillna('90', inplace=True)
df_clean['review_id'] = df_clean['FPL ID'].astype(int)
pos_fix = {'GK': 'G'}
df_clean['Pos'] = df_clean['Position']
df_clean['Pos'] = df_clean['Pos'].replace(pos_fix)
df_clean.loc[df_clean['Pos'].isin(['G', 'D']), 'Weighted minutes'] = '90'
gws = []
for i in df_clean.columns:
try:
int(i)
df_clean[f'{i}_Pts'] = df_clean[i].str.strip().replace({'-': 0}).astype(float)
df_clean[f'{i}_xMins'] = df_clean['Weighted minutes'].str.strip().replace({'-': 0}).astype(float).replace({np.nan: 0})
gws.append(i)
except:
continue
df_clean['Name'] = df_clean['Player']
df_clean['Value'] = df_clean['Price']
df_final = df_clean[['review_id', 'Name', 'Pos', 'Value'] + [f'{gw}_{tag}' for gw in gws for tag in ['Pts', 'xMins']]].copy()
df_final.replace({'-': 0}, inplace=True)
elements_data = r['elements']
player_ids = [i['id'] for i in elements_data]
player_names = {i['id']: i['web_name'] for i in elements_data}
player_pos = {i['id']: i['element_type'] for i in elements_data}
player_price = {i['id']: i['now_cost']/10 for i in elements_data}
pos_no = {1: 'G', 2: 'D', 3: 'M', 4: 'F'}
values = []
existing_players = df_final['review_id'].to_list()
for i in player_ids:
if i not in existing_players:
entry = {'review_id': i, 'Name': player_names[i], 'Pos': pos_no[player_pos[i]], 'Value': player_price[i], **{f'{gw}_{tag}': 0 for gw in gws for tag in ['Pts', 'xMins']}}
values.append(entry)
team_data = teams
team_dict = {i['code']: i['name'] for i in team_data}
player_teams = {i['id']: team_dict[i['team_code']] for i in elements_data}
# Add missing players
# df_final = pd.concat([df_final, pd.DataFrame(values, columns=df_final.columns)], ignore_index=True)
df_final['Team'] = df_final['review_id'].map(player_teams)
df_final['fpl_id'] = df_final['review_id']
df_final['Name'] = df_final['review_id'].replace(player_names)
df_final.set_index('fpl_id', inplace=True)
df_final.fillna(0, inplace=True)
# df_final.to_csv(f'../data/mikkel.csv')
if target is not None:
df_final.to_csv(target)
return df_final
class Kiwi(DataSource):
def __init__(self, source):
super().__init__(source)
def convert(self, target=None):
df = pd.read_csv(self.source, index_col=0)
df['id'] = df['ID']
pos = 0
min_gw = 0
for c in df.columns:
try:
min_gw = int(c)
break
except:
pos += 1
continue
last_gw = 38
col_count = last_gw - min_gw + 1
df.iloc[:, pos:38-pos+1]
main_values = df.iloc[:, 0:5].copy()
main_values['Pos'] = main_values['Pos'].str[0]
main_values['Value'] = main_values['Price']
# main_values.columns= main_values.columns.str.lower()
xmin_values = df.iloc[:, pos:pos+col_count].copy()
xmin_values = xmin_values.add_suffix("_xMins")
xp_start = list(df).index('xPts')
xp_values = df.iloc[:, xp_start+1:xp_start+col_count+1]
xp_values.columns = [i[0] for i in xp_values.columns.str.split('.')]
xp_values = xp_values.add_suffix("_Pts")
r = requests.get('https://fantasy.premierleague.com/api/bootstrap-static/').json()
elements = r['elements']
name_dict = {i['id']: i['web_name'] for i in elements}
main_values['Name'] = main_values['ID'].map(name_dict)
kiwi_data = pd.concat([main_values, xmin_values, xp_values], axis=1)
kiwi_data['id'] = kiwi_data['ID']
kiwi_data.set_index('ID', inplace=True)
kiwi_data.sort_index(inplace=True)
kiwi_data.reset_index(drop=True, inplace=True)
if target:
kiwi_data.to_csv(target)
return kiwi_data
class Scout(DataSource):
def __init__(self, source):
super().__init__(source)
def convert(self, target=None):
raw_data = pd.read_csv(self.source, index_col=0)
r = requests.get("https://fantasy.premierleague.com/api/bootstrap-static/")
scout_team_dict = {
'Man City': 'MCI',
'Liverpool': 'LIV',
'Tottenham': 'TOT',
'Chelsea': 'CHE',
'Man Utd': 'MUN',
'Fulham': 'FUL',
'Crystal Palace': 'CRY',
'West Ham': 'WHU',
'Leicester': 'LEI',
'Arsenal': 'ARS',
'Brentford': 'BRE',
'Leeds': 'LEE',
'Newcastle': 'NEW',
'Everton': 'EVE',
'Southampton': 'SOU',
'Wolves': 'WOL',
'Brighton': 'BHA',
'Aston Villa': 'AVL',
'Bournemouth': 'BOU',
'Nottingham Forest': 'NFO'
}
raw_data['team_short'] = raw_data['Team'].map(scout_team_dict)
raw_data['clean_name'] = raw_data['Player'].apply(fix_name_dialect)
raw_data['Pos'] = raw_data['Pos'].replace({"GK": "G"})
players = r.json()['elements']
element_type_dict = {1: 'G', 2: 'D', 3: 'M', 4: 'F'}
teams = r.json()['teams']
team_code_dict = {i['code']: i for i in teams}
player_names = [{
'id': e['id'],
'web_name': e['web_name'],
'combined': e['first_name'] + ' ' + e['second_name'],
'team': team_code_dict[e['team_code']]['short_name'],
'position': element_type_dict[e['element_type']],
} for e in players]
for target in player_names:
target['wn'] = fix_name_dialect(target['web_name'])
target['cn'] = fix_name_dialect(target['combined'])
entries = []
for player in raw_data.iloc:
possible_matches = [i for i in player_names if i['team'] == player['team_short'] and i['position'] == player['Pos']]
for target in possible_matches:
# p = unicodedata.normalize('NFD', player['Player']).encode('ascii', 'ignore').decode("utf-8") + ' ' + player['Team']
p = player['clean_name']
target['wn_score'] = fuzz.token_set_ratio(p,target['wn'])
target['cn_score'] = fuzz.token_set_ratio(p,target['cn'])
if len(possible_matches) == 0:
print("No matches for ", player)
best_match = max(possible_matches, key=get_best_score)
entries.append({'player_input': player['Player'], 'team_input': player['team_short'], 'position_input': player['Pos'], **best_match})
entries_df = pd.DataFrame(entries)
entries_df['name_team'] = entries_df['player_input'] + ' @ ' + entries_df['team_input']
entry_dict = entries_df.set_index('name_team')['id'].to_dict()
raw_data['name_team'] = raw_data['Player'] + ' @ ' + raw_data['team_short']
raw_data['review_id'] = raw_data['name_team'].map(entry_dict)
raw_data.dropna(subset=['review_id'], inplace=True)
raw_data['review_id'] = raw_data['review_id'].astype(int)
raw_data['Pos'] = raw_data['Pos'].str[0]
raw_data['Value'] = raw_data['Price'].str.replace('m', '').astype(float)
raw_data['Name'] = raw_data['Player']
raw_data['pid'] = raw_data['review_id']
def scout_fix_col_name(col):
if 'GW' in col and '_' not in col:
return col.split('GW')[1] + "_Pts"
elif '_xMin' in col:
return col.split('GW')[1] + 's'
else:
return col
key_names = {i: scout_fix_col_name(i) for i in raw_data.keys()}
raw_data.rename(columns=key_names, inplace=True)
raw_data.set_index('pid', inplace=True)
raw_data.sort_values(by='review_id', inplace=True)
raw_data['id'] = raw_data['review_id']
if target:
raw_data.to_csv(target)
return raw_data
class Fix(DataSource):
def __init__(self, source):
super().__init__(source)
def convert(self, target=None):
raw_data = pd.read_csv(self.source, index_col=0)
def fffix_fix_col_name(col):
if "pts_" in col:
return col.replace("pts_", "") + "_Pts"
elif col == 'price':
return "Value"
elif col == 'name':
return "Name"
elif col == 'id':
return 'fix_id'
elif col == 'team':
return 'Team'
else:
return col
raw_data['review_id'] = raw_data.index + 1
for i in range(1,39):
raw_data[f'{i}_xMins'] = 90
pos_no = {1: 'G', 2: 'D', 3: 'M', 4: 'F'}
static_url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
r = requests.get(static_url).json()
elements_data = r['elements']
type_dict = {i['id']: pos_no[i['element_type']] for i in elements_data}
raw_data['Pos'] = raw_data['review_id'].map(type_dict)
key_names = {i: fffix_fix_col_name(i) for i in raw_data.keys()}
raw_data.rename(columns=key_names, inplace=True)
raw_data['id'] = raw_data['review_id'].astype(int)
picked_columns = ['id', 'Name', 'Value', 'Pos', 'Team'] + [i for i in raw_data.columns if '_Pts' in i] + [i for i in raw_data.columns if '_xMins' in i]
summary_data = raw_data[picked_columns].copy()
summary_data.sort_values(by='id', inplace=True)
if target:
summary_data.to_csv(target)
return summary_data
class Hub(DataSource):
def __init__(self, source):
super().__init__(source)
def convert(self, target=None):
raw_data = pd.read_csv(self.source, index_col=0)
raw_data['review_id'] = raw_data['fpl_id']
pos_no = {1: 'G', 2: 'D', 3: 'M', 4: 'F'} # 'G': 0, 'D': 1, 'M': 2, 'F': 3}
raw_data['Pos'] = raw_data['position_id'].map(pos_no)
static_url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
r = requests.get(static_url).json()
elements_data = r['elements']
team_data = r['teams']
player_prices = {i['id']: i['now_cost']/10 for i in elements_data}
team_dict = {i['code']: i['name'] for i in team_data}
player_teams = {i['id']: team_dict[i['team_code']] for i in elements_data}
raw_data['Value'] = raw_data['fpl_id'].map(player_prices)
raw_data['Team'] = raw_data['fpl_id'].map(player_teams)
def hub_fix_col_name(col):
if "_" in col:
return col.replace("_pts", "_Pts").replace("_xmins", "_xMins")
elif col == 'player':
return "Name"
else:
return col
key_names = {i: hub_fix_col_name(i) for i in raw_data.keys()}
raw_data.rename(columns=key_names, inplace=True)
raw_data.set_index('fpl_id', inplace=True)
raw_data['id'] = raw_data['review_id'].astype(int)
raw_data.sort_values(by='id', inplace=True)
if target:
raw_data.to_csv(target)
return raw_data
if __name__ == "__main__":
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment