Created
August 4, 2018 16:06
-
-
Save cameronlai/3a2475e422fb1c26b74a91162a1daf54 to your computer and use it in GitHub Desktop.
Mapping bus data from data.gov.hk to GTFS format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
import datetime | |
import pyodbc | |
from pyproj import Proj, transform | |
import zipfile | |
# File Directories | |
dirMDB = "./data_gov/" | |
output_folder = "./my-feed/" | |
# Coordinate Transform | |
WGS84 = Proj(init='EPSG:4326') | |
HK80 = Proj(init='EPSG:2326') | |
# DB related | |
conn_str_prefix = "Driver={Microsoft Access Driver (*.mdb)};DBQ=" | |
delta_mins = 5 | |
# from company_code.mdb to agency.txt | |
conn_str = conn_str_prefix + dirMDB + "COMPANY_CODE.mdb" | |
with pyodbc.connect(conn_str) as conn: | |
cur = conn.cursor() | |
with open(output_folder + 'agency.txt', 'w') as f: | |
f.write('agency_id,agency_name,agency_url,agency_timezone\n') | |
cur.execute(r"SELECT * FROM COMPANY_CODE") | |
companies = cur.fetchall() | |
for c in companies: | |
f.write('%s,%s,http://www.xxx.com,Asia/Shanghai\n' % (c.COMPANY_CODE, c.COMPANY_NAMEE)) | |
# Load route db | |
# from rstop_bus.mdb to routes.txt | |
route_id_ary = [] | |
conn_str = conn_str_prefix + dirMDB + "ROUTE_BUS.mdb" | |
with pyodbc.connect(conn_str) as conn: | |
cur = conn.cursor() | |
with open(output_folder + 'routes.txt', 'w') as f: | |
f.write('route_id,agency_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color\n') | |
cur.execute(r"SELECT DISTINCT ROUTE_ID, ROUTE_NAMEE, COMPANY_CODE, HYPERLINK_E FROM ROUTE") | |
route = cur.fetchall() | |
for r in route: | |
route_id_ary.append(r.ROUTE_ID) | |
f.write('%d,%s,,%s,,3,%s,,\n' % (r.ROUTE_ID, r.COMPANY_CODE, r.ROUTE_NAMEE, r.HYPERLINK_E)) | |
# from rstop_bus.mdb to trips.txt | |
with open(output_folder + 'trips.txt', 'w') as f: | |
f.write('route_id,service_id,trip_id,trip_headsign,direction_id,block_id,shape_id\n') | |
for r in route: | |
for direction in range(2): | |
trip_id = '%d_%d' % (r.ROUTE_ID, direction) | |
f.write('%d,%s,%s,,%d,,\n' % (r.ROUTE_ID, 'FULLW', trip_id, direction)) | |
# Get back rstop id | |
conn_str = conn_str_prefix + dirMDB + "RSTOP_BUS.mdb" | |
rstop_data = [] | |
with pyodbc.connect(conn_str) as conn: | |
conn = pyodbc.connect(conn_str) | |
cur = conn.cursor() | |
execute_str = r"SELECT STOP_ID, MAX(STOP_NAMEE) FROM RSTOP GROUP BY STOP_ID" | |
cur.execute(execute_str) | |
rstop_data = cur.fetchall() # This includes stop id and field name | |
# from stop_bus.mdb to stops.txt | |
conn_str = conn_str_prefix + dirMDB + "STOP_BUS.mdb" | |
with pyodbc.connect(conn_str) as conn: | |
cur = conn.cursor() | |
with open(output_folder + 'stops.txt', 'w') as f: | |
f.write('stop_id,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url\n') | |
stop_count = 0 | |
for rstop in rstop_data: | |
stop_id = rstop[0] | |
stop_namee = rstop[1] | |
stop_namee = stop_namee.replace(',', ' ') | |
stop_namee = re.sub(r'[^\x00-\x7f]',r' ',stop_namee) | |
cur.execute(r"SELECT STOP_ID, X, Y FROM STOP WHERE STOP_ID=%d" % stop_id) | |
stop = cur.fetchone() | |
x, y = transform(HK80, WGS84, stop.X, stop.Y) | |
f.write('%d,"%s",,%f,%f,%d,\n' % (stop_id, stop_namee, y, x, stop_count / 3)) | |
stop_count += 1 | |
# from stop_bus.mdb to stop_times.txt | |
conn_str = conn_str_prefix + dirMDB + "RSTOP_BUS.mdb" | |
with pyodbc.connect(conn_str) as conn: | |
cur = conn.cursor() | |
with open(output_folder + 'stop_times.txt', 'w') as f: | |
f.write('trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled\n') | |
start_time = datetime.datetime(2018,1,1,5,30,0) | |
for route_id in route_id_ary: | |
for direction in range(2): | |
trip_id = '%d_%d' % (route_id, direction) | |
stop_count = 0 | |
execute_str = r"SELECT STOP_ID, STOP_SEQ FROM RSTOP WHERE ROUTE_ID = %d AND ROUTE_SEQ = %d ORDER BY STOP_SEQ" % (route_id, direction+1) | |
cur.execute(execute_str) | |
rstop_data = cur.fetchall() | |
for rstop in rstop_data: | |
offset = datetime.timedelta(minutes=5*stop_count) | |
arr_time = (start_time + offset).strftime('%H:%M:%S') | |
offset = datetime.timedelta(minutes=5*stop_count+1) | |
dep_time = (start_time + offset).strftime('%H:%M:%S') | |
f.write('%s,%s,%s,%d,%d,,,,\n' % (trip_id,arr_time,dep_time,rstop.STOP_ID,rstop.STOP_SEQ)) | |
stop_count += 1 | |
# calendar.txt | |
with open(output_folder + 'calendar.txt', 'w') as f: | |
f.write('service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date\n') | |
f.write('FULLW,1,1,1,1,1,1,1,20180101,20181231\n') | |
# calendar_dates.txt | |
with open(output_folder + 'calendar_dates.txt', 'w') as f: | |
f.write('service_id,date,exception_type\n') | |
f.write('FULLW,20070604,2\n') | |
# fare_attributes.txt | |
conn_str = conn_str_prefix + dirMDB + "FARE_BUS.mdb" | |
fare_list = [] | |
with pyodbc.connect(conn_str) as conn: | |
cur = conn.cursor() | |
with open(output_folder + 'fare_attributes.txt', 'w') as f: | |
f.write('fare_id,price,currency_type,payment_method,transfers,transfer_duration\n') | |
execute_str = r"SELECT DISTINCT PRICE FROM FARE" | |
cur.execute(execute_str) | |
fare_data = cur.fetchall() | |
for i, d in enumerate(fare_data): | |
f.write('%d,%f,HKD,0,0,\n' % (i, d.PRICE)) | |
fare_list.append(d.PRICE) | |
# fare_rules.txt | |
conn_str = conn_str_prefix + dirMDB + "FARE_BUS.mdb" | |
with pyodbc.connect(conn_str) as conn: | |
cur = conn.cursor() | |
with open(output_folder + 'fare_rules.txt', 'w') as f: | |
f.write('fare_id,route_id,origin_id,destination_id,contains_id\n') | |
execute_str = r"SELECT ROUTE_ID, ROUTE_SEQ, ON_SEQ, OFF_SEQ, PRICE FROM FARE ORDER BY ROUTE_ID" | |
cur.execute(execute_str) | |
fare_data = cur.fetchall() | |
for d in fare_data: | |
fare_id = fare_list.index(d.PRICE) | |
f.write('%d,%s,%d,%d,\n' % (fare_id, d.ROUTE_ID, d.ON_SEQ, d.OFF_SEQ)) | |
# Zip to output file | |
with zipfile.ZipFile('my-feed.zip', mode='w') as zf: | |
for f in os.listdir('my-feed'): | |
zf.write(output_folder + f, f) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment