Instantly share code, notes, and snippets.

Embed
What would you like to do?
from pprint import pprint
from bs4 import BeautifulSoup
import dryscrape
from itertools import tee, islice, chain
BASEURL = 'http://metalligaen.dk'
RELURRL = '/Home/Schedule'
WTHOMETEAM = '|-\n| style="text-align:left; background:#F2F2F2;"|[[%s]]\n'
WTCELL = '| <!-- vs. %s --> %s\n'
WTSELFCELL = 'style="background:#F2F2F2;"|–'
WTGAMEDIVIDER = '<br />'
WTOT = '<small>&nbsp;(%s)</small>'
WTHEADER = """{| class="wikitable" style="text-align:center;"
! rowspan="2"|Home team
! colspan="10"|Away team
|-
! style="width:70px; background:#EEEE00; color:#222266;"|Esbjerg
! style="width:70px; background:white; color:#00245D;"|Fr.havn
! style="width:70px; background:#0B2161; color:#FE7903;"|Gentofte
! style="width:70px; background:black; color:#FFCF00;"|Herlev
! style="width:70px; background:#1A2738; color:white;"|Herning
! style="width:70px; background:white; color:#832831;"|Odense
! style="width:70px; background:#012FA9; color:#D81F35;"|Rungsted
! style="width:70px; background:#CD2228; color:white;"|Rødovre
! style="width:70px; background:#CCEEFF; color:black;"|Sønderj.
! style="width:70px; background:#C90000; color:black;"|Aalborg
"""
WTFOOTER = '|}'
session = dryscrape.Session(base_url = BASEURL)
session.set_attribute('auto_load_images', False)
session.visit(RELURRL)
session.wait_for(lambda: session.at_xpath("//div[@id='schedule']/table"))
response = session.body()
soup = BeautifulSoup(response, 'html.parser')
gamesoup = soup.find(id='schedule').find_all('tr')
games = []
for game in gamesoup:
cols = game.find_all('td')
# date, time, hometeam, awayteam, venue, TV link, score, OT, rapportlink
if cols:
hometeam = cols[2].text
awayteam = cols[3].text
score = cols[6].text
OT = cols[7].text
score = score.replace(' ', '') #Remove space
OT = OT.replace('\xa0', '') #Remove nbsp
games.append([hometeam, awayteam, score, OT])
teams = list(set([game[0] for game in games]))
# Sort and move Aalborg last
teams = sorted(teams, key=lambda x: x.replace('Aa', 'Å'))
def previous_and_next(some_iterable):
prevs, items, nexts = tee(some_iterable, 3)
prevs = chain([None], prevs)
nexts = chain(islice(nexts, 1, None), [None])
return zip(prevs, items, nexts)
output = WTHEADER
for team in teams:
homegames = []
for game in games:
if team == game[0]:
homegames.append(game)
# Sort on awayteam and move Aalborg last
homegames = sorted(homegames, key=lambda x: x[1].replace('Aa', 'Å'))
output = output + WTHOMETEAM % team
lastawayteam = ''
currentcell = ''
for lasthomegame, homegame, nexthomegame in previous_and_next(homegames):
if lasthomegame:
lastawayteam = lasthomegame[1]
if homegame[3]:
gamestring = homegame[2] + WTOT % homegame[3]
else:
gamestring = homegame[2]
if not lasthomegame:
# First iteration
currentcell = gamestring
elif homegame[1] == lastawayteam:
# The awayteam has not changed
currentcell = currentcell + WTGAMEDIVIDER + gamestring
elif teams.index(homegame[0]) +1 == teams.index(lastawayteam):
output = output + WTCELL % (homegame[0], WTSELFCELL)
output = output + WTCELL % (lastawayteam, currentcell)
currentcell = gamestring
else:
output = output + WTCELL % (lastawayteam, currentcell)
currentcell = gamestring
if teams.index(team) == len(teams) - 2 :
output = output + WTCELL % (homegame[0], WTSELFCELL)
output = output + WTCELL % (lastawayteam, currentcell)
output = output + WTCELL % (homegame[0], WTSELFCELL)
output = output + WTFOOTER
print(output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment