Skip to content

Instantly share code, notes, and snippets.

@lmeulen
Last active April 3, 2021 11:51
Show Gist options
  • Save lmeulen/003e068ede493d52311ca2a5d510523b to your computer and use it in GitHub Desktop.
Save lmeulen/003e068ede493d52311ca2a5d510523b to your computer and use it in GitHub Desktop.
crowdedness_parse_gtfs
def read_csv(csvfile):
with zipfile.ZipFile(os.path.join(GTFSDIR,'gtfs-nl.zip')) as z:
with z.open(csvfile) as f:
return pd.read_csv(f)
AGENCYNAMES = ['NS']
agencies = read_csv('agency.txt')
agency_ids = agencies[agencies.agency_name.isin(AGENCYNAMES)]['agency_id'].values
agencies = agencies[agencies.agency_name.isin(AGENCYNAMES)][['agency_id', 'agency_name']]
routes = read_csv('routes.txt')
routes = routes[routes.agency_id.isin(agency_ids)]
route_ids = routes.route_id.values
routes = routes[['route_id', 'agency_id', 'route_short_name', 'route_long_name', 'route_type']]
trips = read_csv('trips.txt')
trips = trips[trips.route_id.isin(route_ids)]
trip_ids = trips.trip_id.values
service_ids = trips.service_id.values
trips = trips[['route_id', 'service_id', 'trip_id', 'trip_headsign',
'trip_short_name', 'trip_long_name', 'direction_id', 'shape_id']]
trips.trip_short_name = trips.trip_short_name.astype(int)
trips.shape_id = trips.shape_id.astype('Int64')
calendar = read_csv('calendar_dates.txt')
calendar[calendar.service_id.isin(service_ids)]
calendar.date = calendar.date.astype(str)
trips = trips.merge(calendar[['service_id', 'date']], on='service_id')
stoptimes = read_csv('stop_times.txt')
stoptimes = stoptimes[stoptimes.trip_id.isin(trip_ids)]
stoptimes.stop_id = stoptimes.stop_id.astype(str)
stop_ids = stoptimes.stop_id.unique()
stoptimes = stoptimes[['trip_id', 'stop_sequence', 'stop_id', 'arrival_time',
'departure_time', 'shape_dist_traveled']]
stoptimes.arrival_time = stoptimes.arrival_time.apply(lambda x: str2min(x))
stoptimes.departure_time = stoptimes.departure_time.apply(lambda x: str2min(x))
# First get the stops (platforms)
stops_full = read_csv('stops.txt')
stops_full.stop_id = stops_full.stop_id.astype(str)
stops = stops_full[stops_full.stop_id.isin(stop_ids)].copy()
# Now add the stopareas (stations)
stopareas = stops.parent_station.unique()
stops = stops.append(stops_full[stops_full.stop_id.isin(stopareas)].copy())
stops.zone_id = stops.zone_id.str.replace('IFF:', '').str.upper()
stops.stop_code = stops.stop_code.str.upper()
stops = stops[['stop_id', 'stop_name', 'stop_lat', 'stop_lon', 'parent_station',
'platform_code', 'stop_code', 'zone_id']]
stops.loc[stops['zone_id'].isnull(),'zone_id'] = stops['stop_code']
stops.loc[stops['stop_code'].isnull(),'stop_code'] = stops['zone_id']
# shapes = pd.read_csv(os.path.join(GTFSDIR, 'shapes.txt'))
shapes = read_csv('shapes.txt')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment