sakamer71/nflFantasyGetPlayerData.py

## nflFantasyGetPlayerData.py
import requests
from bs4 import BeautifulSoup
from pprint import pprint
#import re
import pandas as pd

baseurl = 'https://www.pro-football-reference.com'
year = 2021
playerDataFile = f"playerData_{year}.csv"
#ffFile = 'fffile.csv'

def getPlayerInfo(name, url,stub):
    url += stub
    #r = requests.get('https://www.pro-football-reference.com//players/T/TaylJo02.htm')
    #print(url)
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    #print(soup.prettify())
    #print(soup.title)
    #exit()
    try:
        position = soup.find("meta", {"name":"Description"})["content"].split(',')[0].split(':')[-1].strip()
    except:
        position = "unknown"
    try:
        height = soup.find("span", itemprop="height").text.strip()
    except:
        height = "unknown"
    try:
        weight = soup.find("span", itemprop="weight").text.strip()
    except:
        weight = "unknown"
    try:
        team = soup.find("span", itemprop="affiliation").text.strip()
    except:
        team="NA"
    try:
        birthDate = soup.find("span", itemprop="birthDate").text.strip()
    except:
        birthDate = "unknown"
    try:
        college = list(soup.find("strong", text="College").next_siblings)[1].text.strip()
    except:
        college = "NA"
    print(name, height, weight, team, position, birthDate, college)
    return (name, height, weight, team, position, birthDate, college)

def renameColumns(df):
    newCols = ['OverallRank','Name','Team','Position','Age','Games','GamesStarted','PassingCmp','PassingAtt','PassingYds','PassingTD','PassingInt','RushingAtt','RushingYds','RushingYPA','RushingTD','ReceivingTgt','ReceivingRec','ReceivingYds','ReceivingYPR','ReceivingTD','Fumbles','FumblesLost','ScoringTD','Scoring2PM','Scoring2PP','FantasyFantPt','FantasyPPR','FantasyDKPt','FantasyFDPt','FantasyVBD','FantasyPosRank','FantasyOvRank']
    df.columns = newCols
    return df

def main():
    playerData = []
    url = baseurl + '/years/' + str(year) + '/fantasy.htm'
    r = requests.get(url)
    ff_df = pd.read_html(url)[0]
    ff_df = renameColumns(ff_df)
    print(ff_df.head(5))

    #print(len(ff_df))
    ff_df['Name'] = ff_df['Name'].str.replace("*","").str.replace('+','')
    print(ff_df.head(5))
    #exit()

    print(ff_df.columns)
    #print(type(names))
    #ff_df.to_csv(ffFile)


    soup = BeautifulSoup(r.content, 'html.parser')
    # #print(soup)
    parsed_table = soup.find_all('table')[0]
    #print(parsed_table)
    for i,row in enumerate(parsed_table.find_all('tr')[2:]):

        dat = row.find('td', attrs={'data-stat': 'player'})
        if dat:
            name = dat.a.get_text()
            stub = dat.a.get('href')
            #print(name, stub)
            playerData.append(getPlayerInfo(name, baseurl, stub))
    #print(playerData)
    df = pd.DataFrame(playerData, columns=["Name", "Height", "Weight", "Team", "Position", "BirthDate", "College"])
    merged = pd.merge(df, ff_df,how='left', on='Name', suffixes=('', '_remove') )
    merged['Height'] = merged['Height'].astype('string')
    merged.drop([i for i in merged.columns if '_remove' in i], axis=1, inplace=True)
    merged.to_csv(playerDataFile)

if __name__ == '__main__':
    main()
	import requests
	from bs4 import BeautifulSoup
	from pprint import pprint
	#import re
	import pandas as pd

	baseurl = 'https://www.pro-football-reference.com'
	year = 2021
	playerDataFile = f"playerData_{year}.csv"
	#ffFile = 'fffile.csv'

	def getPlayerInfo(name, url,stub):
	url += stub
	#r = requests.get('https://www.pro-football-reference.com//players/T/TaylJo02.htm')
	#print(url)
	r = requests.get(url)
	soup = BeautifulSoup(r.text, 'html.parser')
	#print(soup.prettify())
	#print(soup.title)
	#exit()
	try:
	position = soup.find("meta", {"name":"Description"})["content"].split(',')[0].split(':')[-1].strip()
	except:
	position = "unknown"
	try:
	height = soup.find("span", itemprop="height").text.strip()
	except:
	height = "unknown"
	try:
	weight = soup.find("span", itemprop="weight").text.strip()
	except:
	weight = "unknown"
	try:
	team = soup.find("span", itemprop="affiliation").text.strip()
	except:
	team="NA"
	try:
	birthDate = soup.find("span", itemprop="birthDate").text.strip()
	except:
	birthDate = "unknown"
	try:
	college = list(soup.find("strong", text="College").next_siblings)[1].text.strip()
	except:
	college = "NA"
	print(name, height, weight, team, position, birthDate, college)
	return (name, height, weight, team, position, birthDate, college)

	def renameColumns(df):
	newCols = ['OverallRank','Name','Team','Position','Age','Games','GamesStarted','PassingCmp','PassingAtt','PassingYds','PassingTD','PassingInt','RushingAtt','RushingYds','RushingYPA','RushingTD','ReceivingTgt','ReceivingRec','ReceivingYds','ReceivingYPR','ReceivingTD','Fumbles','FumblesLost','ScoringTD','Scoring2PM','Scoring2PP','FantasyFantPt','FantasyPPR','FantasyDKPt','FantasyFDPt','FantasyVBD','FantasyPosRank','FantasyOvRank']
	df.columns = newCols
	return df

	def main():
	playerData = []
	url = baseurl + '/years/' + str(year) + '/fantasy.htm'
	r = requests.get(url)
	ff_df = pd.read_html(url)[0]
	ff_df = renameColumns(ff_df)
	print(ff_df.head(5))

	#print(len(ff_df))
	ff_df['Name'] = ff_df['Name'].str.replace("*","").str.replace('+','')
	print(ff_df.head(5))
	#exit()

	print(ff_df.columns)
	#print(type(names))
	#ff_df.to_csv(ffFile)


	soup = BeautifulSoup(r.content, 'html.parser')
	# #print(soup)
	parsed_table = soup.find_all('table')[0]
	#print(parsed_table)
	for i,row in enumerate(parsed_table.find_all('tr')[2:]):

	dat = row.find('td', attrs={'data-stat': 'player'})
	if dat:
	name = dat.a.get_text()
	stub = dat.a.get('href')
	#print(name, stub)
	playerData.append(getPlayerInfo(name, baseurl, stub))
	#print(playerData)
	df = pd.DataFrame(playerData, columns=["Name", "Height", "Weight", "Team", "Position", "BirthDate", "College"])
	merged = pd.merge(df, ff_df,how='left', on='Name', suffixes=('', '_remove') )
	merged['Height'] = merged['Height'].astype('string')
	merged.drop([i for i in merged.columns if '_remove' in i], axis=1, inplace=True)
	merged.to_csv(playerDataFile)

	if __name__ == '__main__':
	main()