Skip to content

Instantly share code, notes, and snippets.

@tomverran
Last active September 15, 2016 21:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tomverran/85ace6720bd6cf7af0e8f747c82c0a17 to your computer and use it in GitHub Desktop.
Save tomverran/85ace6720bd6cf7af0e8f747c82c0a17 to your computer and use it in GitHub Desktop.
National rail FTP timetable downloader
import xml.etree.cElementTree as ET
from ftplib import FTP
import StringIO
import gzip
import time
import re
import json
import boto3
import tempfile
client = boto3.client('s3')
s3 = boto3.resource('s3')
resp = client.get_object(Bucket = 'tv-private', Key = 'national-rail-ftp.txt')
password = resp["Body"].read().decode('utf-8')
ftp = FTP('datafeeds.nationalrail.co.uk')
ftp.login(user = 'ftpuser', passwd=password)
files = []
ftp.retrlines('NLST', files.append)
filename = next(f for f in files if f.endswith('v8.xml.gz'))
timetable_gz = tempfile.TemporaryFile()
ftp.retrbinary('RETR ' + filename, timetable_gz.write)
timetable_gz.seek(0)
timetable = gzip.GzipFile(fileobj = timetable_gz, mode = 'rb')
tofind = [
'BCKNHMJ <- 08:32',
'BCKNHMJ -> 08:41',
'BCKNHMJ <- 08:19'
]
ids = []
for (e, elem) in ET.iterparse(timetable):
if(elem.tag.endswith('Journey')):
children = list(elem)
(start, departs) = (children[0].get('tpl'), children[0].get('ptd'))
(end, arrives) = (children[-1].get('tpl'), children[-1].get('pta'))
if(start and end and departs and arrives):
skey = start + ' -> ' + departs
dkey = end + ' <- ' + arrives
if (skey in tofind or dkey in tofind):
ids.append(elem.get('uid'))
elem.clear()
s3.Object('tv-timetables', 'trains.json').put(Body=json.dumps(ids))
timetable_gz.close()
timetable.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment