Loading and Querying GTFS data
//LOAD CSV script for GFTS data | |
create constraint on (a:Agency) assert a.id is unique; | |
create constraint on (r:Route) assert r.id is unique; | |
create constraint on (t:Trip) assert t.id is unique; | |
create index on :Trip(service_id); | |
create constraint on (s:Stop) assert s.id is unique; | |
create index on :Stoptime(stop_sequence); | |
create index on :Stop(name); | |
schema await | |
//add the agency | |
load csv with headers from | |
'file:///nmbs/agency.txt' as csv | |
create (a:Agency {id: toInt(csv.agency_id), name: csv.agency_name, url: csv.agency_url, timezone: csv.agency_timezone}); | |
// add the routes | |
load csv with headers from | |
'file:///nmbs/routes.txt' as csv | |
match (a:Agency {id: toInt(csv.agency_id)}) | |
create (a)-[:OPERATES]->(r:Route {id: csv.route_id, short_name: csv.route_short_name, long_name: csv.route_long_name, type: toInt(csv.route_type)}); | |
// add the trips | |
load csv with headers from | |
'file:///nmbs/trips.txt' as csv | |
match (r:Route {id: csv.route_id}) | |
merge (r)<-[:USES]-(t:Trip {id: csv.trip_id, service_id: csv.service_id}); | |
//add the stops | |
load csv with headers from | |
'file:///nmbs/stops.txt' as csv | |
create (s:Stop {id: csv.stop_id, name: csv.stop_name, lat: toFloat(csv.stop_lat), lon: toFloat(csv.stop_lon), platform_code: csv.platform_code, parent_station: csv.parent_station, location_type: csv.location_type}); | |
//connect parent/child relationships to stops | |
load csv with headers from | |
'file:///nmbs/stops.txt' as csv | |
with csv | |
where not (csv.parent_station is null) | |
match (ps:Stop {id: csv.parent_station}), (s:Stop {id: csv.stop_id}) | |
create (ps)<-[:PART_OF]-(s); | |
//add the stoptimes | |
load csv with headers from | |
'file:///nmbs/stop_times.txt' as csv | |
match (t:Trip {id: csv.trip_id}), (s:Stop {id: csv.stop_id}) | |
create (t)<-[:PART_OF_TRIP]-(st:Stoptime {arrival_time: csv.arrival_time, departure_time: csv.departure_time, stop_sequence: toInt(csv.stop_sequence)})-[:LOCATED_AT]->(s); | |
//create integers out of the stoptimes (to allow for calculations/ordering) | |
match (s:Stoptime) | |
set s.arrival_time_int=toInt(replace(s.arrival_time,":",""))/100 | |
set s.departure_time_int=toInt(replace(s.departure_time,":",""))/100 | |
//connect the stoptime sequences | |
match (s1:Stoptime)-[:PART_OF_TRIP]->(t:Trip), | |
(s2:Stoptime)-[:PART_OF_TRIP]->(t) | |
where s2.stop_sequence=s1.stop_sequence+1 | |
create (s1)-[:PRECEDES]->(s2); |
//Query a GFTS dataset | |
//Find ANTWERP | |
match (s:Stop) | |
where upper(s.name) starts with "ANTW" | |
return s | |
//Find Turnhout part 1 | |
match (s:Stop) | |
where s.name starts with "Turn" | |
return s | |
//Find Turnhout part 2 | |
match (s:Stop) | |
where upper(s.name) starts with "TURN" | |
return s | |
//Find Antwerp and Turnhout | |
// match (s:Stop) | |
// where s.name starts with "Antw" | |
// or s.name starts with "Turn" | |
// return s; | |
match (ant:Stop), (tu:Stop) | |
where ant.name starts with "Antw" | |
AND tu.name starts with "Turn" | |
return distinct tu,ant; | |
// match (ant:Stop) | |
// where ant.name starts with "Antw" | |
// return ant.name as name | |
// union | |
// match (tu:Stop) | |
// where tu.name starts with "Turn" | |
// return tu.name as name; | |
//find the links between the top-level stops | |
match (t:Stop)<-[:PART_OF]-(:Stop), | |
(a:Stop)<-[:PART_OF]-(:Stop) | |
where t.name starts with "Turn" | |
AND a.name="Antwerpen-Centraal" | |
with t,a | |
match p = allshortestpaths((t)-[*]-(a)) | |
return p | |
limit 10; | |
match (t:Stop),(a:Stop) | |
where t.name starts with "Turn" | |
AND a.name="Antwerpen-Centraal" | |
with t,a | |
match p = allshortestpaths((t)-[*]-(a)) | |
return p | |
limit 10 | |
//range queries with and without indexes | |
match (st:Stoptime) | |
where st.departure_time < "07:45:00" | |
return st.departure_time; | |
//find a DIRECT route with range conditions | |
match (tu:Stop {name: "Turnhout"})--(tu_st:Stoptime) | |
where tu_st.departure_time > "07:00:00" | |
AND tu_st.departure_time < "09:00:00" | |
with tu, tu_st | |
match (ant:Stop {name:"Antwerpen-Centraal"})--(ant_st:Stoptime) | |
where ant_st.arrival_time < "09:00:00" | |
AND ant_st.arrival_time > "07:00:00" | |
and ant_st.arrival_time > tu_st.departure_time | |
with ant,ant_st,tu, tu_st | |
match p = allshortestpaths((tu_st)-[*]->(ant_st)) | |
with nodes(p) as n | |
unwind n as nodes | |
match (nodes)-[r]-() | |
return nodes,r | |
//Indirect routes | |
//find the route and the stops for the indirect route | |
match (t:Stop),(a:Stop) | |
where t.name = "Turnhout" | |
AND a.name="Arlon" | |
with t,a | |
match p = allshortestpaths((t)-[*]-(a)) | |
where NONE (x in relationships(p) where type(x)="OPERATES") | |
return p | |
limit 10 | |
//plan a specific indirect route | |
MATCH (tu:Stop {name:"Turnhout"})--(st_tu:Stoptime), | |
(ar:Stop {name:"Arlon"})--(st_ar:Stoptime), | |
p1=((st_tu)-[:PRECEDES*]->(st_midway_arr:Stoptime)), | |
(st_midway_arr)--(midway:Stop), | |
(midway)--(st_midway_dep:Stoptime), | |
p2=((st_midway_dep)-[:PRECEDES*]->(st_ar)) | |
WHERE | |
st_tu.departure_time > "08:00:00" | |
AND st_tu.departure_time < "11:00:00" | |
AND st_midway_arr.arrival_time > st_tu.departure_time | |
AND st_midway_dep.departure_time > st_midway_arr.arrival_time | |
AND st_ar.arrival_time > st_midway_dep.departure_time | |
RETURN | |
tu,st_tu,ar,st_ar,p1,p2,midway | |
order by (st_ar.arrival_time_int-st_tu.departure_time_int) ASC | |
limit 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
Thanks Bruggen,
I read in Bruggen blog about: Loading General Transport Feed Spec (GTFS) files into Neo4j.
It was so helpful.
I'm looking on how can i write a query which get a spanning tree for all the stop's which in a given range of departure_time and arrival_time, without filtering on a specific stop.
perhaps next step i want to run a query which give answer about transportation acceptability between two given areas.
Do you have any suggestion how to do it?