Skip to content

Instantly share code, notes, and snippets.

@copy
Created March 25, 2013 19:32
Show Gist options
  • Save copy/f0642d4782110823ffdb to your computer and use it in GitHub Desktop.
Save copy/f0642d4782110823ffdb to your computer and use it in GitHub Desktop.
#!/usr/bin/python
import re
import urllib
import time
import HTMLParser
import os
'''try:
os.mkdir('/tmp/bahn/')
except:
pass'''
url = "http://reiseauskunft.bahn.de/bin/bhftafel.exe/dn?ld=&country=DEU&rt=1&input=Wattenscheid-H%F6ntrop%238006227&boardType=dep&time=actual&productsFilter=11111&REQTrain_name=S1&start=yes"
handle = urllib.urlopen(url)
source = handle.read()
handle.close()
next_trains = []
parser = HTMLParser.HTMLParser()
def remove_html(str):
# remove html tags, unescape entities and trim
str = parser.unescape(re.sub('<[^>]*>', ' ', str))
str = str.strip()
str = str.replace(u'\xfc', 'ue')
str = str.replace(u'\xdc', 'Ue')
str = str.replace(u'\xe4', 'ae')
str = str.replace(u'\xc4', 'Ae')
str = str.replace(u'\xf6', 'oe')
str = str.replace(u'\xD6', 'Oe')
str = re.sub('[ \t]+', ' ', str)
return str
def abbreviate_station(str):
# abbriate long station names
str = str.replace('Wattenscheid', 'Wat')
str = str.replace('Duesseldorf', 'D\'dorf')
str = str.replace(' Hbf', '')
return str
def abbreviate_status(str):
str = re.sub('ca\...', '', str)
str = str.replace(' , Grund:', ';')
return str
for single_train in source.split('<td class="time">')[2:-1]:
time_str = re.search(r'(\d\d:\d\d)', single_train)
dest_html = re.search(r'<td class="route">(.*?)</td>', single_train, re.S)
platform_html = re.search(r'<td class="platform">(.*?)</td>', single_train, re.S)
status_html = re.search(r'<td class="ris">(.*?)</td>', single_train, re.S)
#print platform_html
#print single_train
if not all([time_str, dest_html, platform_html]):
print '= INVALID ENTRY ='
#print single_train
print time_str
print dest_html
print platform_html
print status_html
print ''
continue
#raise Exception('Parse error: not all match')
if not status_html:
status = '(no status)'
else:
status = remove_html(status_html.group(1))
arrival_time = time.strptime(time_str.group(1), '%H:%M')
dest, stations = remove_html(dest_html.group(1)).split('\n', 1)
platform = remove_html(platform_html.group(1))
next_trains.append( (arrival_time, dest, stations, platform, status) )
#print next_trains
file = open('/tmp/bahn', 'w+')
if len(next_trains) > 1:
dest_max_width = max(len(abbreviate_station(dest)) for _, dest, _, _, _ in next_trains)
for arrival, dest, stations, platform, status in next_trains:
line = ''
line += time.strftime('%H:%M', arrival) + ' | '
line += (abbreviate_station(dest).rjust(dest_max_width).encode('utf8')) + ' | '
#line += platform.encode('utf8') + ' | '
line += abbreviate_status(status.encode('utf8')) + '\n'
file.write(line)
print line,
else:
file.write("No trains found")
file.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment