thejsj/1-fixed.py

## 1-fixed.py
import requests
import xlwt
import datetime
from bs4 import BeautifulSoup

# Turn lines of code used multiples times into functions
def fetch_soup(uri):
    response = requests.get(uri)
    # Add "html.paser" so beautiful soup doesn't complain
    return BeautifulSoup(response.content, "html.parser")

# Get all players from the soup and form a list of dictionaries
def get_players(players_soup):
    players_raw = players_soup.find_all("td", {"data-stat":"player"})
    all_players = [] # From an array
    for i, html_raw in enumerate(players_raw):
        name                 = html_raw.text.split()
        # Form a dictionary for player to keep values together
        # instead of having a lot of lists that depend on the index to
        # fetch a value
        player               = {}
        player['index']      = i
        player['first_name'] = name[0]
        player['last_name']  = name[1]
        player['link']       = html_raw.find('a').get('href')
        # Append the dictionary to the list to keep all players together
        all_players.append(player)
    # Return a list of players
    return all_players

def append_3point_fg_pct(soup, players):
    three_point_pct_raw = soup.find_all("td", {"data-stat":"fg3_pct"})
    # Enumerate adds a numerical index to every loop iteration (0, 1, 2, ...)
    # and makes the behavior similar to a for loop in other languages
    # `i` refers to index, which should always be a numerical value refering
    # to the place in the array where we are (0, 1, 2,....)
    # `val` refers to the actual value (a dictonary) in this case
    for i, val in enumerate(three_point_pct_raw):
        # Only add something of it exists
        if val.text != "" and float(val.text) >= 0.001 :
            # Add a new property to this dictionary
            players[i]['3point_pct'] = float(val.text)
    # return the players list again
    return players

# For every player, fetch their NBA stats
def append_nba_attempts(players):
    for player in players:
        # Use the `link` property from the dictionary to fetch the player stats
        player_link_soup = fetch_soup("http://www.basketball-reference.com"+ player['link'])
        attempts_per_game = player_link_soup.find_all("td", {"data-stat":"fg3a_per_g"})
        games = player_link_soup.find_all("td", {"data-stat":"g"})
        # The `attempts_per_game` is the text for the last td found
        # [-1] refers to the last element in an array
        # After getting the text in the last element, convert it to a float
        player['attempts_per_game'] = float(attempts_per_game[-1].text)
        player['games'] = float(games[-1].text)
        player['attemptstotal'] = player['attempts_per_game'] * player['games']
    return players

def append_college_attempts(players):
    for player in players:
        # Join first and last name to be able to fetch page
        player_name = player['first_name'].lower() + "-" + player['last_name'].lower()
        player_soup = fetch_soup("http://www.sports-reference.com/cbb/players/" + player_name + "-1.html")
        three_pt_fg_pct = player_soup.find_all("td", {"data-stat":"fg3_pct"})
        ft_pct = player_soup.find_all("td", {"data-stat":"ft_pct"})
        # Only add property to dictionary if the array has more than 1 elments
        if len(three_pt_fg_pct) > 0:
            player['college_3point_pct'] = three_pt_fg_pct[-1].text
        if len(ft_pct) > 0:
            player['college_ft_pct'] = ft_pct[-1].text
    return players

def add_stats_to_sheet(workbook, stats, year):
    sheet = workbook.add_sheet(year)
    # stats_arr is a matrix (lists with lists in them) that we will turn into a spreadsheet
    for i, stat_arr in enumerate(stats):
        # http://stackoverflow.com/questions/522563/accessing-the-index-in-python-for-loops
        for ii, val in enumerate(stat_arr):
            sheet.write(i, ii, val)

def save_workbook(workbook):
    # Save the sheet to the spreadsheet with the current time
    dt = datetime.datetime.now()
    workbook.save("./2015- " + str(dt.strftime("%s")) + ".xls")

# THIS IS OUR MAN FUNCTION!
def main():
    print "Fetching players..."
    players_soup = fetch_soup("http://www.basketball-reference.com/draft/NBA_2015.html")
    # Create the player dictionary
    all_players = get_players(players_soup)

    # Append a property to the player dictionary
    all_players = append_3point_fg_pct(players_soup, all_players)
    # Filter out (remove) all players with not 3 point percentage
    all_players = list(filter(lambda x : '3point_pct' in x, all_players))

    print "Fetching NBA attempts per player..."
    all_players = append_nba_attempts(all_players)
    # Filter out (remove) all players with not enough attempts
    all_players = list(filter(lambda x : 'attemptstotal' > 40.0, all_players))

    print "Fetching college attempts per player..."
    all_players = append_college_attempts(all_players)
    # Filter out (remove) all players with not college 3 point percentage
    all_players = list(filter(lambda x : 'college_3point_pct' in x, all_players))

    print "Saving to excel..."
    # Transform dictionary into an array to pass to spreadsheet
    rows = []
    rows.append(['Name', 'NBA 3P%', 'College 3P%', 'College FT%'])
    for p in all_players:
        rows.append([
          p['first_name'] + " " + p['last_name'],
          str(p['3point_pct']),
          p['college_3point_pct'],
          p['college_ft_pct']
        ])

    workbook = xlwt.Workbook()
    add_stats_to_sheet(workbook, rows, '2015')
    save_workbook(workbook)
    print "Done"

# This basically runs our `main` function if this file is not being imported
# by another python file
if __name__ == '__main__':
  main()

## 2-con-comentarios.py
import requests
from bs4 import BeautifulSoup
import xlwt

r = requests.get("http://www.basketball-reference.com/draft/NBA_2015.html")

soup = BeautifulSoup(r.content)

# No se que son estas varialbes
x=0
y=0
z=0
b=1
u=1
d=0

## aa? Que es esto?
aa = soup.find_all("td", {"data-stat":"player"})

# No se que hacen todas estas variables
new = []
links = []
slowpoke = []
vape = []
detres = []
intentos = []
attempts = []
attemptstotal = []

# Aqui puedes user un map en vez de hacer esto (mas limpio)
for i in aa:
    new.append(((i.text).split()))

# Aqui puedes usar (for x in range(60))
while x <= 59:
    links.append(aa[x].find_all("a"))
    x = x+1

for i in links:
    for x in i:
        slowpoke.append(x)

# Esto lo podias hacer arriba y no hacerlo aqui
for x in slowpoke:
    vape.append((x.get('href')))

for i in soup.find_all("td", {"data-stat":"fg3_pct"}):
    # Esto es lo que se llama un filter (mas limpio)
    if i.text=="" or float(i.text) < 0.001 :
        new.remove(new[y])
        vape.remove(vape[y])
    else:
        detres.append(i.text)
        y=y+1

# esto deberia de ser una funcion
for i in vape:
     a = requests.get("http://www.basketball-reference.com"+i)
     sopa = BeautifulSoup(a.content)
     hellothere = sopa.find_all("td", {"data-stat":"fg3a_per_g"})
     angel = sopa.find_all("td", {"data-stat":"g"})
     # ni entinedo lo que esta pasando aqui
     for b in hellothere[len(hellothere)-1]:
         intentos.append(b)
     for c in angel[len(angel)-1]:
         intentos.insert(z,c)
     attempts.append(intentos)
     intentos=[]

for i in attempts:
    u=1
    for x in i:
        u= float(x)*u
    if u > 40:
        attemptstotal.append(int(round(u)))
        d=d+1
    else:
        new.remove(new[d])
        vape.remove(vape[d])
        detres.remove(detres[d])

qcyo = []
noc= []
college3 = []
collegeft = []
l=0

for i in new:
    r=requests.get("http://www.sports-reference.com/cbb/players/" +i[0].lower() + "-" + i[1].lower() + "-1.html")
    soup = BeautifulSoup(r.content)
    qcyo.append(soup.find_all("td", {"data-stat":"fg3_pct"}))
    noc.append(soup.find_all("td", {"data-stat":"ft_pct"}))

for b in qcyo:
    l=l+1
    if b==[]:
        new.remove(new[l-1])
        vape.remove(vape[l-1])
        detres.remove(detres[l-1])
        l=l-1
    else:
        college3.append(b[len(b)-1].text)

for c in noc:
    if c!=[]:
       collegeft.append(c[len(c)-1].text)

# Esto deberia de ser una funcion
wb=xlwt.Workbook()
ws = wb.add_sheet("2015")

ws.write(0,0,"Name")
ws.write(0,1,"NBA 3P%")
ws.write(0,2,"College 3P%")
ws.write(0,3,"College FT%")

yu=1

for i in college3:
    ws.write(yu,2,i)
    yu=yu+1

yu=1

for i in collegeft:
    ws.write(yu,3,i)
    yu=yu+1

yu=1

for i in detres:
    ws.write(yu,1,i)
    yu=yu+1

yu=1

for i in new:
	a=" "
	for b in reversed(i):
		a=b + " " +a
	ws.write(yu,0,a)
	yu=yu+1

wb.save("C:/Users/Los/Documents/Danny/Python/2015.xls")
# Pq tienes todo este esapcio debajo????


## 3-original.py
import requests
from bs4 import BeautifulSoup
import xlwt

r = requests.get("http://www.basketball-reference.com/draft/NBA_2015.html")

soup = BeautifulSoup(r.content)

x=0
y=0
z=0
b=1
u=1
d=0

aa = soup.find_all("td", {"data-stat":"player"})

new = []
links = []
slowpoke = []
vape = []
detres = []
intentos = []
attempts = []
attemptstotal = []

for i in aa:
    new.append(((i.text).split()))

while x <= 59:
    links.append(aa[x].find_all("a"))
    x = x+1

for i in links:
    for x in i:
        slowpoke.append(x)

for x in slowpoke:
    vape.append((x.get('href')))

for i in soup.find_all("td", {"data-stat":"fg3_pct"}):
    if i.text=="" or float(i.text) < 0.001 :
        new.remove(new[y])
        vape.remove(vape[y])
    else:
        detres.append(i.text)
        y=y+1

for i in vape:
     a = requests.get("http://www.basketball-reference.com"+i)
     sopa = BeautifulSoup(a.content)
     hellothere = sopa.find_all("td", {"data-stat":"fg3a_per_g"})
     angel = sopa.find_all("td", {"data-stat":"g"})
     for b in hellothere[len(hellothere)-1]:
         intentos.append(b)
     for c in angel[len(angel)-1]:
         intentos.insert(z,c)
     attempts.append(intentos)
     intentos=[]

for i in attempts:
    u=1
    for x in i:
        u= float(x)*u
    if u > 40:
        attemptstotal.append(int(round(u)))
        d=d+1
    else:
        new.remove(new[d])
        vape.remove(vape[d])
        detres.remove(detres[d])

qcyo = []
noc= []
college3 = []
collegeft = []
l=0

for i in new:
    r=requests.get("http://www.sports-reference.com/cbb/players/" +i[0].lower() + "-" + i[1].lower() + "-1.html")
    soup = BeautifulSoup(r.content)
    qcyo.append(soup.find_all("td", {"data-stat":"fg3_pct"}))
    noc.append(soup.find_all("td", {"data-stat":"ft_pct"}))

for b in qcyo:
    l=l+1
    if b==[]:
        new.remove(new[l-1])
        vape.remove(vape[l-1])
        detres.remove(detres[l-1])
        l=l-1
    else:
        college3.append(b[len(b)-1].text)

for c in noc:
    if c!=[]:
       collegeft.append(c[len(c)-1].text)

wb=xlwt.Workbook()
ws = wb.add_sheet("2015")

ws.write(0,0,"Name")
ws.write(0,1,"NBA 3P%")
ws.write(0,2,"College 3P%")
ws.write(0,3,"College FT%")

yu=1

for i in college3:
    ws.write(yu,2,i)
    yu=yu+1

yu=1

for i in collegeft:
    ws.write(yu,3,i)
    yu=yu+1

yu=1

for i in detres:
    ws.write(yu,1,i)
    yu=yu+1

yu=1

for i in new:
	a=" "
	for b in reversed(i):
		a=b + " " +a
	ws.write(yu,0,a)
	yu=yu+1

wb.save("C:/Users/Los/Documents/Danny/Python/2015.xls")
	import requests
	import xlwt
	import datetime
	from bs4 import BeautifulSoup

	# Turn lines of code used multiples times into functions
	def fetch_soup(uri):
	response = requests.get(uri)
	# Add "html.paser" so beautiful soup doesn't complain
	return BeautifulSoup(response.content, "html.parser")

	# Get all players from the soup and form a list of dictionaries
	def get_players(players_soup):
	players_raw = players_soup.find_all("td", {"data-stat":"player"})
	all_players = [] # From an array
	for i, html_raw in enumerate(players_raw):
	name = html_raw.text.split()
	# Form a dictionary for player to keep values together
	# instead of having a lot of lists that depend on the index to
	# fetch a value
	player = {}
	player['index'] = i
	player['first_name'] = name[0]
	player['last_name'] = name[1]
	player['link'] = html_raw.find('a').get('href')
	# Append the dictionary to the list to keep all players together
	all_players.append(player)
	# Return a list of players
	return all_players

	def append_3point_fg_pct(soup, players):
	three_point_pct_raw = soup.find_all("td", {"data-stat":"fg3_pct"})
	# Enumerate adds a numerical index to every loop iteration (0, 1, 2, ...)
	# and makes the behavior similar to a for loop in other languages
	# `i` refers to index, which should always be a numerical value refering
	# to the place in the array where we are (0, 1, 2,....)
	# `val` refers to the actual value (a dictonary) in this case
	for i, val in enumerate(three_point_pct_raw):
	# Only add something of it exists
	if val.text != "" and float(val.text) >= 0.001 :
	# Add a new property to this dictionary
	players[i]['3point_pct'] = float(val.text)
	# return the players list again
	return players

	# For every player, fetch their NBA stats
	def append_nba_attempts(players):
	for player in players:
	# Use the `link` property from the dictionary to fetch the player stats
	player_link_soup = fetch_soup("http://www.basketball-reference.com"+ player['link'])
	attempts_per_game = player_link_soup.find_all("td", {"data-stat":"fg3a_per_g"})
	games = player_link_soup.find_all("td", {"data-stat":"g"})
	# The `attempts_per_game` is the text for the last td found
	# [-1] refers to the last element in an array
	# After getting the text in the last element, convert it to a float
	player['attempts_per_game'] = float(attempts_per_game[-1].text)
	player['games'] = float(games[-1].text)
	player['attemptstotal'] = player['attempts_per_game'] * player['games']
	return players

	def append_college_attempts(players):
	for player in players:
	# Join first and last name to be able to fetch page
	player_name = player['first_name'].lower() + "-" + player['last_name'].lower()
	player_soup = fetch_soup("http://www.sports-reference.com/cbb/players/" + player_name + "-1.html")
	three_pt_fg_pct = player_soup.find_all("td", {"data-stat":"fg3_pct"})
	ft_pct = player_soup.find_all("td", {"data-stat":"ft_pct"})
	# Only add property to dictionary if the array has more than 1 elments
	if len(three_pt_fg_pct) > 0:
	player['college_3point_pct'] = three_pt_fg_pct[-1].text
	if len(ft_pct) > 0:
	player['college_ft_pct'] = ft_pct[-1].text
	return players

	def add_stats_to_sheet(workbook, stats, year):
	sheet = workbook.add_sheet(year)
	# stats_arr is a matrix (lists with lists in them) that we will turn into a spreadsheet
	for i, stat_arr in enumerate(stats):
	# http://stackoverflow.com/questions/522563/accessing-the-index-in-python-for-loops
	for ii, val in enumerate(stat_arr):
	sheet.write(i, ii, val)

	def save_workbook(workbook):
	# Save the sheet to the spreadsheet with the current time
	dt = datetime.datetime.now()
	workbook.save("./2015- " + str(dt.strftime("%s")) + ".xls")

	# THIS IS OUR MAN FUNCTION!
	def main():
	print "Fetching players..."
	players_soup = fetch_soup("http://www.basketball-reference.com/draft/NBA_2015.html")
	# Create the player dictionary
	all_players = get_players(players_soup)

	# Append a property to the player dictionary
	all_players = append_3point_fg_pct(players_soup, all_players)
	# Filter out (remove) all players with not 3 point percentage
	all_players = list(filter(lambda x : '3point_pct' in x, all_players))

	print "Fetching NBA attempts per player..."
	all_players = append_nba_attempts(all_players)
	# Filter out (remove) all players with not enough attempts
	all_players = list(filter(lambda x : 'attemptstotal' > 40.0, all_players))

	print "Fetching college attempts per player..."
	all_players = append_college_attempts(all_players)
	# Filter out (remove) all players with not college 3 point percentage
	all_players = list(filter(lambda x : 'college_3point_pct' in x, all_players))

	print "Saving to excel..."
	# Transform dictionary into an array to pass to spreadsheet
	rows = []
	rows.append(['Name', 'NBA 3P%', 'College 3P%', 'College FT%'])
	for p in all_players:
	rows.append([
	p['first_name'] + " " + p['last_name'],
	str(p['3point_pct']),
	p['college_3point_pct'],
	p['college_ft_pct']
	])

	workbook = xlwt.Workbook()
	add_stats_to_sheet(workbook, rows, '2015')
	save_workbook(workbook)
	print "Done"

	# This basically runs our `main` function if this file is not being imported
	# by another python file
	if __name__ == '__main__':
	main()