Last active
April 28, 2021 16:42
-
-
Save marc0x71/70badcb283e41bf2e5301d4eba073b40 to your computer and use it in GitHub Desktop.
Load GTFS data into Neo4j
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//LOAD CSV script for GFTS data | |
create constraint on (a:Agency) assert a.id is unique; | |
create constraint on (r:Route) assert r.id is unique; | |
create constraint on (t:Trip) assert t.id is unique; | |
create index on :Trip(service_id); | |
create constraint on (s:Stop) assert s.id is unique; | |
create index on :Stoptime(stop_sequence); | |
create index on :Stop(name); | |
//add the agency | |
USING PERIODIC COMMIT 500 | |
load csv with headers from | |
'file:///agency.txt' as csv | |
create (a:Agency {id: csv.agency_id, name: csv.agency_name, url: csv.agency_url, timezone: csv.agency_timezone}); | |
// add the routes | |
USING PERIODIC COMMIT 500 | |
load csv with headers from | |
'file:///routes.txt' as csv | |
match (a:Agency {id: csv.agency_id}) | |
create (a)-[:OPERATES]->(r:Route {id: csv.route_id, short_name: csv.route_short_name, long_name: csv.route_long_name, type: toInt(csv.route_type)}); | |
// add the trips | |
USING PERIODIC COMMIT 500 | |
load csv with headers from | |
'file:///trips.txt' as csv | |
WITH csv | |
match (r:Route {id: csv.route_id}) | |
merge (r)<-[:USES]-(t:Trip {id: csv.trip_id, service_id: csv.service_id, direction_id: csv.direction_id }); | |
//add the stops | |
USING PERIODIC COMMIT 500 | |
load csv with headers from | |
'file:///stops.txt' as csv | |
create (s:Stop {id: csv.stop_id, name: csv.stop_name, lat: toFloat(csv.stop_lat), lon: toFloat(csv.stop_lon), platform_code: csv.platform_code, parent_station: csv.parent_station, location_type: csv.location_type, wheelchair_boarding: csv.wheelchair_boarding }); | |
//connect parent/child relationships to stops | |
USING PERIODIC COMMIT 500 | |
load csv with headers from | |
'file:///stops.txt' as csv | |
with csv | |
where not (csv.parent_station is null) | |
match (ps:Stop {id: csv.parent_station}), (s:Stop {id: csv.stop_id}) | |
create (ps)<-[:PART_OF]-(s); | |
//add the stoptimes | |
USING PERIODIC COMMIT 500 | |
load csv with headers from | |
'file:///stop_times.txt' as csv | |
match (t:Trip {id: csv.trip_id}), (s:Stop {id: csv.stop_id}) | |
create (t)<-[:PART_OF_TRIP]-(st:Stoptime {arrival_time: csv.arrival_time, departure_time: csv.departure_time, stop_sequence: toInt(csv.stop_sequence)})-[:LOCATED_AT]->(s); | |
//create integers out of the stoptimes (to allow for calculations/ordering) | |
// run this statement until you get back a result of 0. | |
MATCH (s:Stoptime) | |
WHERE NOT EXISTS (s.arrival_time_int) | |
WITH s LIMIT 500000 | |
set s.arrival_time_int=toInteger(replace(s.arrival_time,":","")), | |
s.departure_time_int=toInteger(replace(s.departure_time,":","")) | |
RETURN count(s); | |
//connect the stoptime sequences | |
match (s1:Stoptime)-[:PART_OF_TRIP]->(t:Trip), | |
(s2:Stoptime)-[:PART_OF_TRIP]->(t) | |
where s2.stop_sequence=s1.stop_sequence+1 | |
create (s1)-[:PRECEDES]->(s2); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment