Last active
April 20, 2017 06:19
-
-
Save thejsj/072b6ed7f0d4a3b9ee02b5c7a4c3e0ae to your computer and use it in GitHub Desktop.
Dani + Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import xlwt | |
import datetime | |
from bs4 import BeautifulSoup | |
# Turn lines of code used multiples times into functions | |
def fetch_soup(uri): | |
response = requests.get(uri) | |
# Add "html.paser" so beautiful soup doesn't complain | |
return BeautifulSoup(response.content, "html.parser") | |
# Get all players from the soup and form a list of dictionaries | |
def get_players(players_soup): | |
players_raw = players_soup.find_all("td", {"data-stat":"player"}) | |
all_players = [] # From an array | |
for i, html_raw in enumerate(players_raw): | |
name = html_raw.text.split() | |
# Form a dictionary for player to keep values together | |
# instead of having a lot of lists that depend on the index to | |
# fetch a value | |
player = {} | |
player['index'] = i | |
player['first_name'] = name[0] | |
player['last_name'] = name[1] | |
player['link'] = html_raw.find('a').get('href') | |
# Append the dictionary to the list to keep all players together | |
all_players.append(player) | |
# Return a list of players | |
return all_players | |
def append_3point_fg_pct(soup, players): | |
three_point_pct_raw = soup.find_all("td", {"data-stat":"fg3_pct"}) | |
# Enumerate adds a numerical index to every loop iteration (0, 1, 2, ...) | |
# and makes the behavior similar to a for loop in other languages | |
# `i` refers to index, which should always be a numerical value refering | |
# to the place in the array where we are (0, 1, 2,....) | |
# `val` refers to the actual value (a dictonary) in this case | |
for i, val in enumerate(three_point_pct_raw): | |
# Only add something of it exists | |
if val.text != "" and float(val.text) >= 0.001 : | |
# Add a new property to this dictionary | |
players[i]['3point_pct'] = float(val.text) | |
# return the players list again | |
return players | |
# For every player, fetch their NBA stats | |
def append_nba_attempts(players): | |
for player in players: | |
# Use the `link` property from the dictionary to fetch the player stats | |
player_link_soup = fetch_soup("http://www.basketball-reference.com"+ player['link']) | |
attempts_per_game = player_link_soup.find_all("td", {"data-stat":"fg3a_per_g"}) | |
games = player_link_soup.find_all("td", {"data-stat":"g"}) | |
# The `attempts_per_game` is the text for the last td found | |
# [-1] refers to the last element in an array | |
# After getting the text in the last element, convert it to a float | |
player['attempts_per_game'] = float(attempts_per_game[-1].text) | |
player['games'] = float(games[-1].text) | |
player['attemptstotal'] = player['attempts_per_game'] * player['games'] | |
return players | |
def append_college_attempts(players): | |
for player in players: | |
# Join first and last name to be able to fetch page | |
player_name = player['first_name'].lower() + "-" + player['last_name'].lower() | |
player_soup = fetch_soup("http://www.sports-reference.com/cbb/players/" + player_name + "-1.html") | |
three_pt_fg_pct = player_soup.find_all("td", {"data-stat":"fg3_pct"}) | |
ft_pct = player_soup.find_all("td", {"data-stat":"ft_pct"}) | |
# Only add property to dictionary if the array has more than 1 elments | |
if len(three_pt_fg_pct) > 0: | |
player['college_3point_pct'] = three_pt_fg_pct[-1].text | |
if len(ft_pct) > 0: | |
player['college_ft_pct'] = ft_pct[-1].text | |
return players | |
def add_stats_to_sheet(workbook, stats, year): | |
sheet = workbook.add_sheet(year) | |
# stats_arr is a matrix (lists with lists in them) that we will turn into a spreadsheet | |
for i, stat_arr in enumerate(stats): | |
# http://stackoverflow.com/questions/522563/accessing-the-index-in-python-for-loops | |
for ii, val in enumerate(stat_arr): | |
sheet.write(i, ii, val) | |
def save_workbook(workbook): | |
# Save the sheet to the spreadsheet with the current time | |
dt = datetime.datetime.now() | |
workbook.save("./2015- " + str(dt.strftime("%s")) + ".xls") | |
# THIS IS OUR MAN FUNCTION! | |
def main(): | |
print "Fetching players..." | |
players_soup = fetch_soup("http://www.basketball-reference.com/draft/NBA_2015.html") | |
# Create the player dictionary | |
all_players = get_players(players_soup) | |
# Append a property to the player dictionary | |
all_players = append_3point_fg_pct(players_soup, all_players) | |
# Filter out (remove) all players with not 3 point percentage | |
all_players = list(filter(lambda x : '3point_pct' in x, all_players)) | |
print "Fetching NBA attempts per player..." | |
all_players = append_nba_attempts(all_players) | |
# Filter out (remove) all players with not enough attempts | |
all_players = list(filter(lambda x : 'attemptstotal' > 40.0, all_players)) | |
print "Fetching college attempts per player..." | |
all_players = append_college_attempts(all_players) | |
# Filter out (remove) all players with not college 3 point percentage | |
all_players = list(filter(lambda x : 'college_3point_pct' in x, all_players)) | |
print "Saving to excel..." | |
# Transform dictionary into an array to pass to spreadsheet | |
rows = [] | |
rows.append(['Name', 'NBA 3P%', 'College 3P%', 'College FT%']) | |
for p in all_players: | |
rows.append([ | |
p['first_name'] + " " + p['last_name'], | |
str(p['3point_pct']), | |
p['college_3point_pct'], | |
p['college_ft_pct'] | |
]) | |
workbook = xlwt.Workbook() | |
add_stats_to_sheet(workbook, rows, '2015') | |
save_workbook(workbook) | |
print "Done" | |
# This basically runs our `main` function if this file is not being imported | |
# by another python file | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
import xlwt | |
r = requests.get("http://www.basketball-reference.com/draft/NBA_2015.html") | |
soup = BeautifulSoup(r.content) | |
# No se que son estas varialbes | |
x=0 | |
y=0 | |
z=0 | |
b=1 | |
u=1 | |
d=0 | |
## aa? Que es esto? | |
aa = soup.find_all("td", {"data-stat":"player"}) | |
# No se que hacen todas estas variables | |
new = [] | |
links = [] | |
slowpoke = [] | |
vape = [] | |
detres = [] | |
intentos = [] | |
attempts = [] | |
attemptstotal = [] | |
# Aqui puedes user un map en vez de hacer esto (mas limpio) | |
for i in aa: | |
new.append(((i.text).split())) | |
# Aqui puedes usar (for x in range(60)) | |
while x <= 59: | |
links.append(aa[x].find_all("a")) | |
x = x+1 | |
for i in links: | |
for x in i: | |
slowpoke.append(x) | |
# Esto lo podias hacer arriba y no hacerlo aqui | |
for x in slowpoke: | |
vape.append((x.get('href'))) | |
for i in soup.find_all("td", {"data-stat":"fg3_pct"}): | |
# Esto es lo que se llama un filter (mas limpio) | |
if i.text=="" or float(i.text) < 0.001 : | |
new.remove(new[y]) | |
vape.remove(vape[y]) | |
else: | |
detres.append(i.text) | |
y=y+1 | |
# esto deberia de ser una funcion | |
for i in vape: | |
a = requests.get("http://www.basketball-reference.com"+i) | |
sopa = BeautifulSoup(a.content) | |
hellothere = sopa.find_all("td", {"data-stat":"fg3a_per_g"}) | |
angel = sopa.find_all("td", {"data-stat":"g"}) | |
# ni entinedo lo que esta pasando aqui | |
for b in hellothere[len(hellothere)-1]: | |
intentos.append(b) | |
for c in angel[len(angel)-1]: | |
intentos.insert(z,c) | |
attempts.append(intentos) | |
intentos=[] | |
for i in attempts: | |
u=1 | |
for x in i: | |
u= float(x)*u | |
if u > 40: | |
attemptstotal.append(int(round(u))) | |
d=d+1 | |
else: | |
new.remove(new[d]) | |
vape.remove(vape[d]) | |
detres.remove(detres[d]) | |
qcyo = [] | |
noc= [] | |
college3 = [] | |
collegeft = [] | |
l=0 | |
for i in new: | |
r=requests.get("http://www.sports-reference.com/cbb/players/" +i[0].lower() + "-" + i[1].lower() + "-1.html") | |
soup = BeautifulSoup(r.content) | |
qcyo.append(soup.find_all("td", {"data-stat":"fg3_pct"})) | |
noc.append(soup.find_all("td", {"data-stat":"ft_pct"})) | |
for b in qcyo: | |
l=l+1 | |
if b==[]: | |
new.remove(new[l-1]) | |
vape.remove(vape[l-1]) | |
detres.remove(detres[l-1]) | |
l=l-1 | |
else: | |
college3.append(b[len(b)-1].text) | |
for c in noc: | |
if c!=[]: | |
collegeft.append(c[len(c)-1].text) | |
# Esto deberia de ser una funcion | |
wb=xlwt.Workbook() | |
ws = wb.add_sheet("2015") | |
ws.write(0,0,"Name") | |
ws.write(0,1,"NBA 3P%") | |
ws.write(0,2,"College 3P%") | |
ws.write(0,3,"College FT%") | |
yu=1 | |
for i in college3: | |
ws.write(yu,2,i) | |
yu=yu+1 | |
yu=1 | |
for i in collegeft: | |
ws.write(yu,3,i) | |
yu=yu+1 | |
yu=1 | |
for i in detres: | |
ws.write(yu,1,i) | |
yu=yu+1 | |
yu=1 | |
for i in new: | |
a=" " | |
for b in reversed(i): | |
a=b + " " +a | |
ws.write(yu,0,a) | |
yu=yu+1 | |
wb.save("C:/Users/Los/Documents/Danny/Python/2015.xls") | |
# Pq tienes todo este esapcio debajo???? | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
import xlwt | |
r = requests.get("http://www.basketball-reference.com/draft/NBA_2015.html") | |
soup = BeautifulSoup(r.content) | |
x=0 | |
y=0 | |
z=0 | |
b=1 | |
u=1 | |
d=0 | |
aa = soup.find_all("td", {"data-stat":"player"}) | |
new = [] | |
links = [] | |
slowpoke = [] | |
vape = [] | |
detres = [] | |
intentos = [] | |
attempts = [] | |
attemptstotal = [] | |
for i in aa: | |
new.append(((i.text).split())) | |
while x <= 59: | |
links.append(aa[x].find_all("a")) | |
x = x+1 | |
for i in links: | |
for x in i: | |
slowpoke.append(x) | |
for x in slowpoke: | |
vape.append((x.get('href'))) | |
for i in soup.find_all("td", {"data-stat":"fg3_pct"}): | |
if i.text=="" or float(i.text) < 0.001 : | |
new.remove(new[y]) | |
vape.remove(vape[y]) | |
else: | |
detres.append(i.text) | |
y=y+1 | |
for i in vape: | |
a = requests.get("http://www.basketball-reference.com"+i) | |
sopa = BeautifulSoup(a.content) | |
hellothere = sopa.find_all("td", {"data-stat":"fg3a_per_g"}) | |
angel = sopa.find_all("td", {"data-stat":"g"}) | |
for b in hellothere[len(hellothere)-1]: | |
intentos.append(b) | |
for c in angel[len(angel)-1]: | |
intentos.insert(z,c) | |
attempts.append(intentos) | |
intentos=[] | |
for i in attempts: | |
u=1 | |
for x in i: | |
u= float(x)*u | |
if u > 40: | |
attemptstotal.append(int(round(u))) | |
d=d+1 | |
else: | |
new.remove(new[d]) | |
vape.remove(vape[d]) | |
detres.remove(detres[d]) | |
qcyo = [] | |
noc= [] | |
college3 = [] | |
collegeft = [] | |
l=0 | |
for i in new: | |
r=requests.get("http://www.sports-reference.com/cbb/players/" +i[0].lower() + "-" + i[1].lower() + "-1.html") | |
soup = BeautifulSoup(r.content) | |
qcyo.append(soup.find_all("td", {"data-stat":"fg3_pct"})) | |
noc.append(soup.find_all("td", {"data-stat":"ft_pct"})) | |
for b in qcyo: | |
l=l+1 | |
if b==[]: | |
new.remove(new[l-1]) | |
vape.remove(vape[l-1]) | |
detres.remove(detres[l-1]) | |
l=l-1 | |
else: | |
college3.append(b[len(b)-1].text) | |
for c in noc: | |
if c!=[]: | |
collegeft.append(c[len(c)-1].text) | |
wb=xlwt.Workbook() | |
ws = wb.add_sheet("2015") | |
ws.write(0,0,"Name") | |
ws.write(0,1,"NBA 3P%") | |
ws.write(0,2,"College 3P%") | |
ws.write(0,3,"College FT%") | |
yu=1 | |
for i in college3: | |
ws.write(yu,2,i) | |
yu=yu+1 | |
yu=1 | |
for i in collegeft: | |
ws.write(yu,3,i) | |
yu=yu+1 | |
yu=1 | |
for i in detres: | |
ws.write(yu,1,i) | |
yu=yu+1 | |
yu=1 | |
for i in new: | |
a=" " | |
for b in reversed(i): | |
a=b + " " +a | |
ws.write(yu,0,a) | |
yu=yu+1 | |
wb.save("C:/Users/Los/Documents/Danny/Python/2015.xls") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment