Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
from pprint import pprint
from bs4 import BeautifulSoup
import dryscrape
from itertools import tee, islice, chain
BASEURL = 'http://metalligaen.dk'
RELURRL = '/Home/Schedule'
WTHOMETEAM = '|-\n| style="text-align:left; background:#F2F2F2;"|[[%s]]\n'
WTCELL = '| <!-- vs. %s --> %s\n'
WTSELFCELL = 'style="background:#F2F2F2;"|–'
WTGAMEDIVIDER = '<br />'
WTOT = '<small>&nbsp;(%s)</small>'
WTHEADER = """{| class="wikitable" style="text-align:center;"
! rowspan="2"|Home team
! colspan="10"|Away team
|-
! style="width:70px; background:#EEEE00; color:#222266;"|Esbjerg
! style="width:70px; background:white; color:#00245D;"|Fr.havn
! style="width:70px; background:#0B2161; color:#FE7903;"|Gentofte
! style="width:70px; background:black; color:#FFCF00;"|Herlev
! style="width:70px; background:#1A2738; color:white;"|Herning
! style="width:70px; background:white; color:#832831;"|Odense
! style="width:70px; background:#012FA9; color:#D81F35;"|Rungsted
! style="width:70px; background:#CD2228; color:white;"|Rødovre
! style="width:70px; background:#CCEEFF; color:black;"|Sønderj.
! style="width:70px; background:#C90000; color:black;"|Aalborg
"""
WTFOOTER = '|}'
session = dryscrape.Session(base_url = BASEURL)
session.set_attribute('auto_load_images', False)
session.visit(RELURRL)
session.wait_for(lambda: session.at_xpath("//div[@id='schedule']/table"))
response = session.body()
soup = BeautifulSoup(response, 'html.parser')
gamesoup = soup.find(id='schedule').find_all('tr')
games = []
for game in gamesoup:
cols = game.find_all('td')
# date, time, hometeam, awayteam, venue, TV link, score, OT, rapportlink
if cols:
hometeam = cols[2].text
awayteam = cols[3].text
score = cols[6].text
OT = cols[7].text
score = score.replace(' ', '') #Remove space
OT = OT.replace('\xa0', '') #Remove nbsp
games.append([hometeam, awayteam, score, OT])
teams = list(set([game[0] for game in games]))
# Sort and move Aalborg last
teams = sorted(teams, key=lambda x: x.replace('Aa', 'Å'))
def previous_and_next(some_iterable):
prevs, items, nexts = tee(some_iterable, 3)
prevs = chain([None], prevs)
nexts = chain(islice(nexts, 1, None), [None])
return zip(prevs, items, nexts)
output = WTHEADER
for team in teams:
homegames = []
for game in games:
if team == game[0]:
homegames.append(game)
# Sort on awayteam and move Aalborg last
homegames = sorted(homegames, key=lambda x: x[1].replace('Aa', 'Å'))
output = output + WTHOMETEAM % team
lastawayteam = ''
currentcell = ''
for lasthomegame, homegame, nexthomegame in previous_and_next(homegames):
if lasthomegame:
lastawayteam = lasthomegame[1]
if homegame[3]:
gamestring = homegame[2] + WTOT % homegame[3]
else:
gamestring = homegame[2]
if not lasthomegame:
# First iteration
currentcell = gamestring
elif homegame[1] == lastawayteam:
# The awayteam has not changed
currentcell = currentcell + WTGAMEDIVIDER + gamestring
elif teams.index(homegame[0]) +1 == teams.index(lastawayteam):
output = output + WTCELL % (homegame[0], WTSELFCELL)
output = output + WTCELL % (lastawayteam, currentcell)
currentcell = gamestring
else:
output = output + WTCELL % (lastawayteam, currentcell)
currentcell = gamestring
if teams.index(team) == len(teams) - 2 :
output = output + WTCELL % (homegame[0], WTSELFCELL)
output = output + WTCELL % (lastawayteam, currentcell)
output = output + WTCELL % (homegame[0], WTSELFCELL)
output = output + WTFOOTER
print(output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.