Skip to content

Instantly share code, notes, and snippets.

@therve
Created January 21, 2015 16:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save therve/9873ab744f6e577a287f to your computer and use it in GitHub Desktop.
Save therve/9873ab744f6e577a287f to your computer and use it in GitHub Desktop.
from __future__ import division
import requests
import BeautifulSoup
team_mapping = {
"NJN": "BRK",
"CHA": "CHO",
"NOH": "NOP"
}
def get_page_body(uri):
d = requests.get("http://www.basketball-reference.com%s" % uri)
content = d.text.encode("utf-8")
return BeautifulSoup.BeautifulSoup(content)
def get_seniority(team):
team_page = get_page_body("/teams/%s/2015.html" % team)
wins = get_winning_percentage(team_page)
dd = team_page.find(id="totals")
data = []
for row in dd.findAll("tr"):
cols = row.findAll("td")
if not cols:
continue
link = cols[1].findAll("a")[0]["href"]
mins = int(cols[5].text)
if mins:
total_mins = get_player_seniority(link, team)
data.append((mins, total_mins))
total = sum(mins for (mins, total_mins) in data)
coef = sum(mins/total_mins*mins/total for (mins, total_mins) in data)
print team, wins, coef
def get_player_seniority(link, team):
teams = [team]
teams.extend(key for (key, value) in team_mapping.items() if value == team)
total_mins = 0
dd = get_page_body(link).find(id="totals")
for row in dd.findAll("tr"):
css_class = row["class"]
if not css_class or css_class.split()[-1] not in ("full_table", "partial_table"):
continue
cols = row.findAll("td")
if cols[2].text in teams:
total_mins += int(cols[7].text)
return total_mins
def get_winning_percentage(team_page):
info_box = team_page.find(id="info_box")
record_box = info_box.find(text="Record:").parent.parent
data = record_box.contents[1].split(",")[0].strip().split("-")
wins, losses = int(data[0]), int(data[1])
return wins / (wins + losses)
def list_teams():
codes = []
data = get_page_body("/teams").find(id="active")
for row in data.findAll("tr"):
cols = row.findAll("td")
if not cols:
continue
links = cols[0].findAll("a")
if not links:
continue
code = links[0]["href"].split("/")[2]
codes.append(code)
return codes
def main():
teams = list_teams()
for team in teams:
if team in team_mapping:
team = team_mapping[team]
get_seniority(team)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment