Created
April 16, 2016 18:21
-
-
Save ejmurray/dd24a615f2eee5da6603832c85fc3577 to your computer and use it in GitHub Desktop.
Bbcsport.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
#!/usr/bin/env python | |
# encoding: utf-8 | |
# author: Ernest | |
# created: 03/02/2016 | |
# http://goo.gl/pXfFe1 | |
""" | |
Description | |
Return the scores and table for a given league using BS4. Change the url to get the given league. | |
premier-league | |
championship | |
spanish-la-liga | |
""" | |
from bs4 import BeautifulSoup | |
import requests | |
import csv | |
url = 'http://www.bbc.com/sport/football/premier-league/results' | |
page = requests.get(url) | |
soup = BeautifulSoup(page, "html.parser") | |
def has_class_but_no_id(tag): | |
return tag.has_attr('score') | |
writer = csv.writer(open("webScraper.csv", "w")) | |
for match in soup.find_all('td', class_='match-details'): | |
home_tag = match.find('span', class_='team-home') | |
home = home_tag and ''.join(home_tag.stripped_strings) | |
score_tag = match.find('span', class_='score') | |
score = score_tag and ''.join(score_tag.stripped_strings) | |
away_tag = match.find('span', class_='team-away') | |
away = away_tag and ''.join(away_tag.stripped_strings) | |
if home and score and away: | |
print(home, score, away) | |
url2 = "http://www.bbc.com/sport/football/spanish-la-liga/table" | |
soup = BeautifulSoup(urlopen(url2).read(), "html.parser") | |
for row in soup("table", {"class": "table-stats"})[0].tbody("tr"): | |
tds = row("td") | |
print(tds[1].contents[2].string, tds[2].string) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment