copy/gist:f0642d4782110823ffdb Secret

## gistfile1.py
#!/usr/bin/python

import re
import urllib
import time
import HTMLParser
import os

'''try:
	os.mkdir('/tmp/bahn/')
except:
	pass'''

url = "http://reiseauskunft.bahn.de/bin/bhftafel.exe/dn?ld=&country=DEU&rt=1&input=Wattenscheid-H%F6ntrop%238006227&boardType=dep&time=actual&productsFilter=11111&REQTrain_name=S1&start=yes"

handle = urllib.urlopen(url)
source = handle.read()
handle.close()

next_trains = []
parser = HTMLParser.HTMLParser()

def remove_html(str):
	# remove html tags, unescape entities and trim
	str = parser.unescape(re.sub('<[^>]*>', ' ', str))
	str = str.strip()
	str = str.replace(u'\xfc', 'ue')
	str = str.replace(u'\xdc', 'Ue')
	str = str.replace(u'\xe4', 'ae')
	str = str.replace(u'\xc4', 'Ae')
	str = str.replace(u'\xf6', 'oe')
	str = str.replace(u'\xD6', 'Oe')

	str = re.sub('[ \t]+', ' ', str)


	return str


def abbreviate_station(str):
	# abbriate long station names
	str = str.replace('Wattenscheid', 'Wat')
	str = str.replace('Duesseldorf', 'D\'dorf')

	str = str.replace(' Hbf', '')
	return str

def abbreviate_status(str):
	str = re.sub('ca\...', '', str)
	str = str.replace(' , Grund:', ';')

	return str


for single_train in source.split('<td class="time">')[2:-1]:
	time_str = re.search(r'(\d\d:\d\d)', single_train)
	dest_html = re.search(r'<td class="route">(.*?)</td>', single_train, re.S)
	platform_html = re.search(r'<td class="platform">(.*?)</td>', single_train, re.S)
	status_html = re.search(r'<td class="ris">(.*?)</td>', single_train, re.S)

	#print platform_html
	#print single_train

	if not all([time_str, dest_html, platform_html]):
		print '= INVALID ENTRY ='
		#print single_train
		print time_str
		print dest_html
		print platform_html
		print status_html
		print ''
		continue
		#raise Exception('Parse error: not all match')

	if not status_html:
		status = '(no status)'
	else:
		status = remove_html(status_html.group(1))

	arrival_time = time.strptime(time_str.group(1), '%H:%M')
	dest, stations = remove_html(dest_html.group(1)).split('\n', 1)
	platform = remove_html(platform_html.group(1))

	next_trains.append( (arrival_time, dest, stations, platform, status) )


	#print next_trains
file = open('/tmp/bahn', 'w+')

if len(next_trains) > 1:

	dest_max_width = max(len(abbreviate_station(dest)) for _, dest, _, _, _ in next_trains)

	for arrival, dest, stations, platform, status in next_trains:

		line = ''

		line += time.strftime('%H:%M', arrival) + ' | '
		line += (abbreviate_station(dest).rjust(dest_max_width).encode('utf8')) + ' | '
		#line += platform.encode('utf8') + ' | '
		line += abbreviate_status(status.encode('utf8')) + '\n'

		file.write(line)
		print line,
else:
	file.write("No trains found")


file.close()
	#!/usr/bin/python

	import re
	import urllib
	import time
	import HTMLParser
	import os

	'''try:
	os.mkdir('/tmp/bahn/')
	except:
	pass'''

	url = "http://reiseauskunft.bahn.de/bin/bhftafel.exe/dn?ld=&country=DEU&rt=1&input=Wattenscheid-H%F6ntrop%238006227&boardType=dep&time=actual&productsFilter=11111&REQTrain_name=S1&start=yes"

	handle = urllib.urlopen(url)
	source = handle.read()
	handle.close()

	next_trains = []
	parser = HTMLParser.HTMLParser()

	def remove_html(str):
	# remove html tags, unescape entities and trim
	str = parser.unescape(re.sub('<[^>]*>', ' ', str))
	str = str.strip()
	str = str.replace(u'\xfc', 'ue')
	str = str.replace(u'\xdc', 'Ue')
	str = str.replace(u'\xe4', 'ae')
	str = str.replace(u'\xc4', 'Ae')
	str = str.replace(u'\xf6', 'oe')
	str = str.replace(u'\xD6', 'Oe')

	str = re.sub('[ \t]+', ' ', str)


	return str


	def abbreviate_station(str):
	# abbriate long station names
	str = str.replace('Wattenscheid', 'Wat')
	str = str.replace('Duesseldorf', 'D\'dorf')

	str = str.replace(' Hbf', '')
	return str

	def abbreviate_status(str):
	str = re.sub('ca\...', '', str)
	str = str.replace(' , Grund:', ';')

	return str



	for single_train in source.split('<td class="time">')[2:-1]:
	time_str = re.search(r'(\d\d:\d\d)', single_train)
	dest_html = re.search(r'<td class="route">(.*?)</td>', single_train, re.S)
	platform_html = re.search(r'<td class="platform">(.*?)</td>', single_train, re.S)
	status_html = re.search(r'<td class="ris">(.*?)</td>', single_train, re.S)

	#print platform_html
	#print single_train

	if not all([time_str, dest_html, platform_html]):
	print '= INVALID ENTRY ='
	#print single_train
	print time_str
	print dest_html
	print platform_html
	print status_html
	print ''
	continue
	#raise Exception('Parse error: not all match')

	if not status_html:
	status = '(no status)'
	else:
	status = remove_html(status_html.group(1))

	arrival_time = time.strptime(time_str.group(1), '%H:%M')
	dest, stations = remove_html(dest_html.group(1)).split('\n', 1)
	platform = remove_html(platform_html.group(1))

	next_trains.append( (arrival_time, dest, stations, platform, status) )


	#print next_trains
	file = open('/tmp/bahn', 'w+')

	if len(next_trains) > 1:

	dest_max_width = max(len(abbreviate_station(dest)) for _, dest, _, _, _ in next_trains)

	for arrival, dest, stations, platform, status in next_trains:

	line = ''

	line += time.strftime('%H:%M', arrival) + ' \| '
	line += (abbreviate_station(dest).rjust(dest_max_width).encode('utf8')) + ' \| '
	#line += platform.encode('utf8') + ' \| '
	line += abbreviate_status(status.encode('utf8')) + '\n'

	file.write(line)
	print line,
	else:
	file.write("No trains found")


	file.close()