Skip to content

Instantly share code, notes, and snippets.

@ian-weisser
Last active February 14, 2019 02:14
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ian-weisser/9994498 to your computer and use it in GitHub Desktop.
Save ian-weisser/9994498 to your computer and use it in GitHub Desktop.
Parse a GTFS file for stops near a location
#!/usr/bin/python3
"""
Locate stop_id candidates from a GTFS stops file.
This is a helper application. You use it to discover a list
of possible stop_ids for your *real* application. It's handy
during debugging, or during setup of an application.
Example: You know an intersection name or a lat/lon pair or
an existing stop_id, and you want the list of stops nearby.
Requires python3, and a zipped GTFS file to read stop data from.
Output data includes the raw GTFS stops.txt line.
USAGE: (see -h for the full list)
$ ./.stop_finder /path/to/GTFS/zipfile --names "oklahoma howell"
$ ./.stop_finder /path/to/GTFS/zipfile --latlon 42.9883466 -87.9041176
$ ./.stop_finder /path/to/GTFS/zipfile --stop 5152
EXAMPLE:
Let's look for the stops at the corner of Oklahoma & Howell:
$ ./stop_finder /path/to/my_file.gtfs --names "oklahoma howell"
Looking near LAT=42.988313549999994, LON=-87.90440945
Dist Stop
------- --------------------------------------------------
13 709,709,HOWELL & OKLAHOMA,, 42.9882051, -87.9043319,,,1
19 5068,5068,OKLAHOMA & HOWELL,, 42.9883466, -87.9041176,,,1
23 5152,5152,OKLAHOMA & HOWELL,, 42.9881561, -87.9046550,,,1
27 658,658,HOWELL & OKLAHOMA,, 42.9885464, -87.9045333,,,1
149 5069,5069,OKLAHOMA & AUSTIN,, 42.9883253, -87.9066868,,,0
153 5153,5153,OKLAHOMA & QUINCY,, 42.9881773, -87.9020859,,,0
181 5151,5151,OKLAHOMA & AUSTIN,, 42.9881666, -87.9071558,,,0
185 5067,5067,OKLAHOMA & QUINCY,, 42.9883465, -87.9015866,,,0
323 5154,5154,OKLAHOMA & PINE,, 42.9881953, -87.8994944,,,0
350 5066,5066,OKLAHOMA & PINE,, 42.9883965, -87.8990949,,,0
The first four results are within a few meters of each other, and
all at the same intersection. The next closest stop is 150 meters
away. The first four stops are the stops we want.
Results are always in meters, and include *all* nearby stops in
order of distance (nearest first).
"""
import argparse
import math
import sys
import zipfile
def parse_command_line():
"""
Parse the command line options:
- Location of the GTFS zipfile
- Type of search (street names, lat/lon, or near an already-known stop_ID)
"""
parser = argparse.ArgumentParser()
parser.add_argument('GTFS_zipfile', metavar='zipfile',
help='path to GTFS zipfile')
group = parser.add_mutually_exclusive_group()
group.add_argument('-n', '--names', '--strings', nargs=1,
metavar='"STRING1 STRING2"',
help='string search (use quotes for multiple strings)')
group.add_argument('-l', '--latlon', '--coordinates',
nargs=2, metavar=('LAT.AAAA','LON.BBBB'),
help='lat/lon search')
group.add_argument('-x', '--stop', '--known-stop',
nargs=1, metavar='STOP_ID',
help='known stop_id')
return parser.parse_args()
def read_stops_zipfile(path):
""" Read the stops.txt file from the zipfile """
if not zipfile.is_zipfile(path):
sys.exit("error: Not a valid GTFS zipfile")
gtfs = zipfile.ZipFile(path, mode='r')
stops_file = gtfs.open('stops.txt', mode='r')
stops_string = stops_file.read().decode('utf-8')
stops_file.close()
gtfs.close()
stops = stops_string.split('\r\n')
return stops
def string_matching(all_stops, search_strings):
"""
Return the list of stops that match the *most* strings in the list.
Example: 'howell lincoln kk'. In Milwaukee, no stops will match
all three terms. Six stops will match two of the three terms,
and this function will return a list of those six stops.
"""
matches = []
highest_match = 0
for stop_line in all_stops:
if len(stop_line.split(',')) < 7:
continue
stop = stop_line.split(',')[2]
counter = 0
for string in search_strings[0].split(' '):
if string.upper() in stop.upper():
counter = counter + 1
if counter == 0:
continue
elif counter < highest_match:
continue
elif counter == highest_match:
matches.append(stop_line)
else:
highest_match = counter
matches = [ stop_line ]
return matches
def xy_distances(latitude):
"""
Return the lat and lon fractions equal to 500m
Example: At 45 deg, 1 degree of longitude (along the 45-line
of latitude) is 78,847m
So 500m = 500/78847 = 0.006341 deg
A 500m bracket would be longitude +/- 0.006431.
The y-distance *along* one degree of longitude varies
by only about 1000m/deg (1%) between equator and pole
The x-distance *along* one degree of latitude depends on the
latitude. It's 111320m at the equator, and 0 at the poles.
See http://en.wikipedia.org/wiki/
Length_of_a_degree_of_longitude#Length_of_a_degree_of_longitude
"""
y_deg = 111132
x_deg = int(math.pi * 6378137 * math.cos(float(latitude)) / 180 )
if x_deg > 0:
return x_deg, y_deg
else:
return -1 * x_deg, y_deg
def center(list_of_stops):
""" Calculate the approximate center of a list of points """
latitude = 0.0
longitude = 0.0
for stop in list_of_stops:
latitude = latitude + float(stop.split(',')[4])
longitude = longitude + float(stop.split(',')[5])
avg_latitude = latitude / len(list_of_stops)
avg_longitude = longitude / len(list_of_stops)
return avg_latitude, avg_longitude
def dist(ax, ay, bx, by, x_deg, y_deg):
""" Approximate distance in meters between a and b """
if ax > bx:
x_dist = (ax - bx) * x_deg
else:
x_dist = (bx - ax) * x_deg
if ay > by:
y_dist = (ay - by) * y_deg
else:
y_dist = (by - ay) * y_deg
distance = int(math.sqrt(x_dist**2 + y_dist**2))
return distance
def nearby_stops(list_of_stops, center_lat, center_lon):
"""
Parse the list_of_stops,
Return all stops within 500 meters,
Display in order, closest first.
Limit display to 20 stops.
"""
maximum_distance = 500.0
list_max_length = 20
x_deg, y_deg = xy_distances(list_of_stops[1].split(',')[4])
x_dist = maximum_distance/x_deg
y_dist = maximum_distance/y_deg
# Arrange stops in order of distance using a dict
final_stops = {}
for stop in list_of_stops:
if 'stop_id' in stop:
continue
if stop == '':
continue
latitude = float(stop.split(',')[4])
longitude = float(stop.split(',')[5])
# Don't bother to calculate distances for stops that
# are obviously outside the maximum_distance box
if longitude < (center_lon - x_dist) \
or longitude > (center_lon + x_dist) \
or latitude < (center_lat - y_dist) \
or latitude > (center_lat + y_dist):
continue
# Calculate distance for stops inside the maximum_distance box
# and add the stop to the appropriate dict
distance = dist(center_lon, center_lat,
longitude, latitude, x_deg, y_deg)
if distance in final_stops.keys():
final_stops[distance].append(stop)
else:
final_stops[distance] = [stop]
# Order the list of distances, and print in order
ordered_list = sorted(final_stops.keys())
counter = 0
for distance in ordered_list:
for stop in final_stops[distance]:
if counter > list_max_length:
break
counter = counter + 1
print(distance, "\t", stop)
return
def run():
""" Locate and arrange the stop_ids from a GTFS stops file """
arg = parse_command_line()
all_stops = read_stops_zipfile(arg.GTFS_zipfile)
if arg.names is not None: # Names, like streets or 'airport'
some_stops = string_matching(all_stops, arg.names)
if len(some_stops) > 6:
print("Try narrowing your results with another term:")
for stop in some_stops:
print(" {}".format(stop))
sys.exit()
else:
latitude, longitude = center(some_stops)
elif arg.latlon is not None: # lat/lon coordinates
latitude = float(arg.latlon[0])
longitude = float(arg.latlon[1])
elif arg.stop is not None: # known stop':
latitude = None
for stop in all_stops:
if len(stop.split(',')) < 7:
continue
if stop.split(',')[0] == arg.stop[0]:
latitude = float(stop.split(',')[4])
longitude = float(stop.split(',')[5])
break
if latitude is None:
sys.exit()
else:
sys.exit()
print("Looking near LAT={}, LON={}".format(latitude, longitude))
print("Dist Stop")
print("------- --------------------------------------------------")
nearby_stops(all_stops, latitude, longitude)
if __name__ == "__main__":
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment