kpwebb/gtfs_ingest.py

## gtfs_ingest.py
# converts GTFS feed to a CSV sequence of "trip_id,time,lat,lon[,TIMEPOINT]"

import datetime
import partridge as ptg
import numpy

day_start = 1522022400; # 03/26/2018 @ 12:00am (UTC)

path = '/path/to/gtfs.zip'

representative_feed = ptg.get_representative_feed(path)

stop_dict = {}

for stop in representative_feed.stops.values:
    stop_dict[stop[0]] = stop

trip_stop_times = {}

for stop_time in representative_feed.stop_times.values:

    if not stop_time[0] in trip_stop_times:
        trip_stop_times[stop_time[0]] = []

    trip_stop_times[stop_time[0]].append(stop_time)


def print_stop_time(data, stop_time, timepoint):
    stop = stop_dict[data[3]]

    output_data = [data[0],str(round((stop_time + day_start) * 1000)), str(stop[4]), str(stop[5])]

    if timepoint:
        output_data.append("TIMEPOINT")

    print(",".join(output_data))


for trip_id in trip_stop_times:
    trip_stops  = trip_stop_times[trip_id]
    sorted_trip_stop_times = sorted(trip_stops, key=lambda k: k[4])

    trip_last_timepoint = 0
    non_timepoint_stops = []
    for stop_time in sorted_trip_stop_times:


        if not numpy.isnan(stop_time[1]):
            current_time = stop_time[1]

            if len(non_timepoint_stops) > 0:
                interpolated_time = (current_time - trip_last_timepoint) / (len(non_timepoint_stops) + 1)
                offset = 1
                for ntp_stop in non_timepoint_stops:
                    print_stop_time(ntp_stop, trip_last_timepoint + (interpolated_time * offset), False)
                    offset += 1
                non_timepoint_stops = []


            trip_last_timepoint = current_time
            print_stop_time(stop_time, stop_time[1], True)
        else:
            non_timepoint_stops.append(stop_time)
	# converts GTFS feed to a CSV sequence of "trip_id,time,lat,lon[,TIMEPOINT]"

	import datetime
	import partridge as ptg
	import numpy

	day_start = 1522022400; # 03/26/2018 @ 12:00am (UTC)

	path = '/path/to/gtfs.zip'

	representative_feed = ptg.get_representative_feed(path)

	stop_dict = {}

	for stop in representative_feed.stops.values:
	stop_dict[stop[0]] = stop

	trip_stop_times = {}

	for stop_time in representative_feed.stop_times.values:

	if not stop_time[0] in trip_stop_times:
	trip_stop_times[stop_time[0]] = []

	trip_stop_times[stop_time[0]].append(stop_time)


	def print_stop_time(data, stop_time, timepoint):
	stop = stop_dict[data[3]]

	output_data = [data[0],str(round((stop_time + day_start) * 1000)), str(stop[4]), str(stop[5])]

	if timepoint:
	output_data.append("TIMEPOINT")

	print(",".join(output_data))



	for trip_id in trip_stop_times:
	trip_stops = trip_stop_times[trip_id]
	sorted_trip_stop_times = sorted(trip_stops, key=lambda k: k[4])

	trip_last_timepoint = 0
	non_timepoint_stops = []
	for stop_time in sorted_trip_stop_times:


	if not numpy.isnan(stop_time[1]):
	current_time = stop_time[1]

	if len(non_timepoint_stops) > 0:
	interpolated_time = (current_time - trip_last_timepoint) / (len(non_timepoint_stops) + 1)
	offset = 1
	for ntp_stop in non_timepoint_stops:
	print_stop_time(ntp_stop, trip_last_timepoint + (interpolated_time * offset), False)
	offset += 1
	non_timepoint_stops = []


	trip_last_timepoint = current_time
	print_stop_time(stop_time, stop_time[1], True)
	else:
	non_timepoint_stops.append(stop_time)