Skip to content

Instantly share code, notes, and snippets.

@thejsj
Last active April 20, 2017 06:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thejsj/072b6ed7f0d4a3b9ee02b5c7a4c3e0ae to your computer and use it in GitHub Desktop.
Save thejsj/072b6ed7f0d4a3b9ee02b5c7a4c3e0ae to your computer and use it in GitHub Desktop.
Dani + Python
import requests
import xlwt
import datetime
from bs4 import BeautifulSoup
# Turn lines of code used multiples times into functions
def fetch_soup(uri):
response = requests.get(uri)
# Add "html.paser" so beautiful soup doesn't complain
return BeautifulSoup(response.content, "html.parser")
# Get all players from the soup and form a list of dictionaries
def get_players(players_soup):
players_raw = players_soup.find_all("td", {"data-stat":"player"})
all_players = [] # From an array
for i, html_raw in enumerate(players_raw):
name = html_raw.text.split()
# Form a dictionary for player to keep values together
# instead of having a lot of lists that depend on the index to
# fetch a value
player = {}
player['index'] = i
player['first_name'] = name[0]
player['last_name'] = name[1]
player['link'] = html_raw.find('a').get('href')
# Append the dictionary to the list to keep all players together
all_players.append(player)
# Return a list of players
return all_players
def append_3point_fg_pct(soup, players):
three_point_pct_raw = soup.find_all("td", {"data-stat":"fg3_pct"})
# Enumerate adds a numerical index to every loop iteration (0, 1, 2, ...)
# and makes the behavior similar to a for loop in other languages
# `i` refers to index, which should always be a numerical value refering
# to the place in the array where we are (0, 1, 2,....)
# `val` refers to the actual value (a dictonary) in this case
for i, val in enumerate(three_point_pct_raw):
# Only add something of it exists
if val.text != "" and float(val.text) >= 0.001 :
# Add a new property to this dictionary
players[i]['3point_pct'] = float(val.text)
# return the players list again
return players
# For every player, fetch their NBA stats
def append_nba_attempts(players):
for player in players:
# Use the `link` property from the dictionary to fetch the player stats
player_link_soup = fetch_soup("http://www.basketball-reference.com"+ player['link'])
attempts_per_game = player_link_soup.find_all("td", {"data-stat":"fg3a_per_g"})
games = player_link_soup.find_all("td", {"data-stat":"g"})
# The `attempts_per_game` is the text for the last td found
# [-1] refers to the last element in an array
# After getting the text in the last element, convert it to a float
player['attempts_per_game'] = float(attempts_per_game[-1].text)
player['games'] = float(games[-1].text)
player['attemptstotal'] = player['attempts_per_game'] * player['games']
return players
def append_college_attempts(players):
for player in players:
# Join first and last name to be able to fetch page
player_name = player['first_name'].lower() + "-" + player['last_name'].lower()
player_soup = fetch_soup("http://www.sports-reference.com/cbb/players/" + player_name + "-1.html")
three_pt_fg_pct = player_soup.find_all("td", {"data-stat":"fg3_pct"})
ft_pct = player_soup.find_all("td", {"data-stat":"ft_pct"})
# Only add property to dictionary if the array has more than 1 elments
if len(three_pt_fg_pct) > 0:
player['college_3point_pct'] = three_pt_fg_pct[-1].text
if len(ft_pct) > 0:
player['college_ft_pct'] = ft_pct[-1].text
return players
def add_stats_to_sheet(workbook, stats, year):
sheet = workbook.add_sheet(year)
# stats_arr is a matrix (lists with lists in them) that we will turn into a spreadsheet
for i, stat_arr in enumerate(stats):
# http://stackoverflow.com/questions/522563/accessing-the-index-in-python-for-loops
for ii, val in enumerate(stat_arr):
sheet.write(i, ii, val)
def save_workbook(workbook):
# Save the sheet to the spreadsheet with the current time
dt = datetime.datetime.now()
workbook.save("./2015- " + str(dt.strftime("%s")) + ".xls")
# THIS IS OUR MAN FUNCTION!
def main():
print "Fetching players..."
players_soup = fetch_soup("http://www.basketball-reference.com/draft/NBA_2015.html")
# Create the player dictionary
all_players = get_players(players_soup)
# Append a property to the player dictionary
all_players = append_3point_fg_pct(players_soup, all_players)
# Filter out (remove) all players with not 3 point percentage
all_players = list(filter(lambda x : '3point_pct' in x, all_players))
print "Fetching NBA attempts per player..."
all_players = append_nba_attempts(all_players)
# Filter out (remove) all players with not enough attempts
all_players = list(filter(lambda x : 'attemptstotal' > 40.0, all_players))
print "Fetching college attempts per player..."
all_players = append_college_attempts(all_players)
# Filter out (remove) all players with not college 3 point percentage
all_players = list(filter(lambda x : 'college_3point_pct' in x, all_players))
print "Saving to excel..."
# Transform dictionary into an array to pass to spreadsheet
rows = []
rows.append(['Name', 'NBA 3P%', 'College 3P%', 'College FT%'])
for p in all_players:
rows.append([
p['first_name'] + " " + p['last_name'],
str(p['3point_pct']),
p['college_3point_pct'],
p['college_ft_pct']
])
workbook = xlwt.Workbook()
add_stats_to_sheet(workbook, rows, '2015')
save_workbook(workbook)
print "Done"
# This basically runs our `main` function if this file is not being imported
# by another python file
if __name__ == '__main__':
main()
import requests
from bs4 import BeautifulSoup
import xlwt
r = requests.get("http://www.basketball-reference.com/draft/NBA_2015.html")
soup = BeautifulSoup(r.content)
# No se que son estas varialbes
x=0
y=0
z=0
b=1
u=1
d=0
## aa? Que es esto?
aa = soup.find_all("td", {"data-stat":"player"})
# No se que hacen todas estas variables
new = []
links = []
slowpoke = []
vape = []
detres = []
intentos = []
attempts = []
attemptstotal = []
# Aqui puedes user un map en vez de hacer esto (mas limpio)
for i in aa:
new.append(((i.text).split()))
# Aqui puedes usar (for x in range(60))
while x <= 59:
links.append(aa[x].find_all("a"))
x = x+1
for i in links:
for x in i:
slowpoke.append(x)
# Esto lo podias hacer arriba y no hacerlo aqui
for x in slowpoke:
vape.append((x.get('href')))
for i in soup.find_all("td", {"data-stat":"fg3_pct"}):
# Esto es lo que se llama un filter (mas limpio)
if i.text=="" or float(i.text) < 0.001 :
new.remove(new[y])
vape.remove(vape[y])
else:
detres.append(i.text)
y=y+1
# esto deberia de ser una funcion
for i in vape:
a = requests.get("http://www.basketball-reference.com"+i)
sopa = BeautifulSoup(a.content)
hellothere = sopa.find_all("td", {"data-stat":"fg3a_per_g"})
angel = sopa.find_all("td", {"data-stat":"g"})
# ni entinedo lo que esta pasando aqui
for b in hellothere[len(hellothere)-1]:
intentos.append(b)
for c in angel[len(angel)-1]:
intentos.insert(z,c)
attempts.append(intentos)
intentos=[]
for i in attempts:
u=1
for x in i:
u= float(x)*u
if u > 40:
attemptstotal.append(int(round(u)))
d=d+1
else:
new.remove(new[d])
vape.remove(vape[d])
detres.remove(detres[d])
qcyo = []
noc= []
college3 = []
collegeft = []
l=0
for i in new:
r=requests.get("http://www.sports-reference.com/cbb/players/" +i[0].lower() + "-" + i[1].lower() + "-1.html")
soup = BeautifulSoup(r.content)
qcyo.append(soup.find_all("td", {"data-stat":"fg3_pct"}))
noc.append(soup.find_all("td", {"data-stat":"ft_pct"}))
for b in qcyo:
l=l+1
if b==[]:
new.remove(new[l-1])
vape.remove(vape[l-1])
detres.remove(detres[l-1])
l=l-1
else:
college3.append(b[len(b)-1].text)
for c in noc:
if c!=[]:
collegeft.append(c[len(c)-1].text)
# Esto deberia de ser una funcion
wb=xlwt.Workbook()
ws = wb.add_sheet("2015")
ws.write(0,0,"Name")
ws.write(0,1,"NBA 3P%")
ws.write(0,2,"College 3P%")
ws.write(0,3,"College FT%")
yu=1
for i in college3:
ws.write(yu,2,i)
yu=yu+1
yu=1
for i in collegeft:
ws.write(yu,3,i)
yu=yu+1
yu=1
for i in detres:
ws.write(yu,1,i)
yu=yu+1
yu=1
for i in new:
a=" "
for b in reversed(i):
a=b + " " +a
ws.write(yu,0,a)
yu=yu+1
wb.save("C:/Users/Los/Documents/Danny/Python/2015.xls")
# Pq tienes todo este esapcio debajo????
import requests
from bs4 import BeautifulSoup
import xlwt
r = requests.get("http://www.basketball-reference.com/draft/NBA_2015.html")
soup = BeautifulSoup(r.content)
x=0
y=0
z=0
b=1
u=1
d=0
aa = soup.find_all("td", {"data-stat":"player"})
new = []
links = []
slowpoke = []
vape = []
detres = []
intentos = []
attempts = []
attemptstotal = []
for i in aa:
new.append(((i.text).split()))
while x <= 59:
links.append(aa[x].find_all("a"))
x = x+1
for i in links:
for x in i:
slowpoke.append(x)
for x in slowpoke:
vape.append((x.get('href')))
for i in soup.find_all("td", {"data-stat":"fg3_pct"}):
if i.text=="" or float(i.text) < 0.001 :
new.remove(new[y])
vape.remove(vape[y])
else:
detres.append(i.text)
y=y+1
for i in vape:
a = requests.get("http://www.basketball-reference.com"+i)
sopa = BeautifulSoup(a.content)
hellothere = sopa.find_all("td", {"data-stat":"fg3a_per_g"})
angel = sopa.find_all("td", {"data-stat":"g"})
for b in hellothere[len(hellothere)-1]:
intentos.append(b)
for c in angel[len(angel)-1]:
intentos.insert(z,c)
attempts.append(intentos)
intentos=[]
for i in attempts:
u=1
for x in i:
u= float(x)*u
if u > 40:
attemptstotal.append(int(round(u)))
d=d+1
else:
new.remove(new[d])
vape.remove(vape[d])
detres.remove(detres[d])
qcyo = []
noc= []
college3 = []
collegeft = []
l=0
for i in new:
r=requests.get("http://www.sports-reference.com/cbb/players/" +i[0].lower() + "-" + i[1].lower() + "-1.html")
soup = BeautifulSoup(r.content)
qcyo.append(soup.find_all("td", {"data-stat":"fg3_pct"}))
noc.append(soup.find_all("td", {"data-stat":"ft_pct"}))
for b in qcyo:
l=l+1
if b==[]:
new.remove(new[l-1])
vape.remove(vape[l-1])
detres.remove(detres[l-1])
l=l-1
else:
college3.append(b[len(b)-1].text)
for c in noc:
if c!=[]:
collegeft.append(c[len(c)-1].text)
wb=xlwt.Workbook()
ws = wb.add_sheet("2015")
ws.write(0,0,"Name")
ws.write(0,1,"NBA 3P%")
ws.write(0,2,"College 3P%")
ws.write(0,3,"College FT%")
yu=1
for i in college3:
ws.write(yu,2,i)
yu=yu+1
yu=1
for i in collegeft:
ws.write(yu,3,i)
yu=yu+1
yu=1
for i in detres:
ws.write(yu,1,i)
yu=yu+1
yu=1
for i in new:
a=" "
for b in reversed(i):
a=b + " " +a
ws.write(yu,0,a)
yu=yu+1
wb.save("C:/Users/Los/Documents/Danny/Python/2015.xls")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment