rvanbruggen/loadgtfs.cql

## loadgtfs.cql
//LOAD CSV script for GFTS data
create constraint on (a:Agency) assert a.id is unique;
create constraint on (r:Route) assert r.id is unique;
create constraint on (t:Trip) assert t.id is unique;
create index on :Trip(service_id);
create constraint on (s:Stop) assert s.id is unique;
create index on :Stoptime(stop_sequence);
create index on :Stop(name);

schema await

//add the agency
load csv with headers from
'file:///nmbs/agency.txt' as csv
create (a:Agency {id: toInt(csv.agency_id), name: csv.agency_name, url: csv.agency_url, timezone: csv.agency_timezone});

// add the routes
load csv with headers from
'file:///nmbs/routes.txt' as csv
match (a:Agency {id: toInt(csv.agency_id)})
create (a)-[:OPERATES]->(r:Route {id: csv.route_id, short_name: csv.route_short_name, long_name: csv.route_long_name, type: toInt(csv.route_type)});

// add the trips
load csv with headers from
'file:///nmbs/trips.txt' as csv
match (r:Route {id: csv.route_id})
merge (r)<-[:USES]-(t:Trip {id: csv.trip_id, service_id: csv.service_id});

//add the stops
load csv with headers from
'file:///nmbs/stops.txt' as csv
create (s:Stop {id: csv.stop_id, name: csv.stop_name, lat: toFloat(csv.stop_lat), lon: toFloat(csv.stop_lon), platform_code: csv.platform_code, parent_station: csv.parent_station, location_type: csv.location_type});

//connect parent/child relationships to stops
load csv with headers from
'file:///nmbs/stops.txt' as csv
with csv
where not (csv.parent_station is null)
match (ps:Stop {id: csv.parent_station}), (s:Stop {id: csv.stop_id})
create (ps)<-[:PART_OF]-(s);

//add the stoptimes
load csv with headers from
'file:///nmbs/stop_times.txt' as csv
match (t:Trip {id: csv.trip_id}), (s:Stop {id: csv.stop_id})
create (t)<-[:PART_OF_TRIP]-(st:Stoptime {arrival_time: csv.arrival_time, departure_time: csv.departure_time, stop_sequence: toInt(csv.stop_sequence)})-[:LOCATED_AT]->(s);

//create integers out of the stoptimes (to allow for calculations/ordering)
match (s:Stoptime)
set s.arrival_time_int=toInt(replace(s.arrival_time,":",""))/100
set s.departure_time_int=toInt(replace(s.departure_time,":",""))/100


//connect the stoptime sequences
match (s1:Stoptime)-[:PART_OF_TRIP]->(t:Trip),
(s2:Stoptime)-[:PART_OF_TRIP]->(t)
where s2.stop_sequence=s1.stop_sequence+1
create (s1)-[:PRECEDES]->(s2);

## querybelgianrailgtfs.cql
//Query a GFTS dataset

//Find ANTWERP
match (s:Stop)
where upper(s.name) starts with "ANTW"
return s

//Find Turnhout part 1
match (s:Stop)
where s.name starts with "Turn"
return s

//Find Turnhout part 2
match (s:Stop)
where upper(s.name) starts with "TURN"
return s


//Find Antwerp and Turnhout
// match (s:Stop)
// where s.name starts with "Antw"
// or s.name starts with "Turn"
// return s;

match (ant:Stop), (tu:Stop)
where ant.name starts with "Antw"
AND tu.name starts with "Turn"
return distinct tu,ant;

// match (ant:Stop)
// where ant.name starts with "Antw"
// return ant.name as name
// union
// match (tu:Stop)
// where tu.name starts with "Turn"
// return tu.name as name;

//find the links between the top-level stops
match (t:Stop)<-[:PART_OF]-(:Stop),
(a:Stop)<-[:PART_OF]-(:Stop)
where t.name starts with "Turn"
AND a.name="Antwerpen-Centraal"
with t,a
match p = allshortestpaths((t)-[*]-(a))
return p
limit 10;

match (t:Stop),(a:Stop)
where t.name starts with "Turn"
AND a.name="Antwerpen-Centraal"
with t,a
match p = allshortestpaths((t)-[*]-(a))
return p
limit 10


//range queries with and without indexes
match (st:Stoptime)
where st.departure_time < "07:45:00"
return st.departure_time;

//find a DIRECT route with range conditions
match (tu:Stop {name: "Turnhout"})--(tu_st:Stoptime)
where tu_st.departure_time > "07:00:00"
AND tu_st.departure_time < "09:00:00"
with tu, tu_st
match (ant:Stop {name:"Antwerpen-Centraal"})--(ant_st:Stoptime)
where ant_st.arrival_time < "09:00:00"
AND ant_st.arrival_time > "07:00:00"
and ant_st.arrival_time > tu_st.departure_time
with ant,ant_st,tu, tu_st
match p = allshortestpaths((tu_st)-[*]->(ant_st))
with nodes(p) as n
unwind n as nodes
match (nodes)-[r]-()
return nodes,r

//Indirect routes

//find the route and the stops for the indirect route
match (t:Stop),(a:Stop)
where t.name = "Turnhout"
AND a.name="Arlon"
with t,a
match p = allshortestpaths((t)-[*]-(a))
where NONE (x in relationships(p) where type(x)="OPERATES")
return p
limit 10

//plan a specific indirect route
MATCH (tu:Stop {name:"Turnhout"})--(st_tu:Stoptime),
(ar:Stop {name:"Arlon"})--(st_ar:Stoptime),
p1=((st_tu)-[:PRECEDES*]->(st_midway_arr:Stoptime)),
(st_midway_arr)--(midway:Stop),
(midway)--(st_midway_dep:Stoptime),
p2=((st_midway_dep)-[:PRECEDES*]->(st_ar))
WHERE
st_tu.departure_time > "08:00:00"
AND st_tu.departure_time < "11:00:00"
AND st_midway_arr.arrival_time > st_tu.departure_time
AND st_midway_dep.departure_time > st_midway_arr.arrival_time
AND st_ar.arrival_time > st_midway_dep.departure_time
RETURN
tu,st_tu,ar,st_ar,p1,p2,midway
order by (st_ar.arrival_time_int-st_tu.departure_time_int) ASC
limit 1
	//LOAD CSV script for GFTS data
	create constraint on (a:Agency) assert a.id is unique;
	create constraint on (r:Route) assert r.id is unique;
	create constraint on (t:Trip) assert t.id is unique;
	create index on :Trip(service_id);
	create constraint on (s:Stop) assert s.id is unique;
	create index on :Stoptime(stop_sequence);
	create index on :Stop(name);

	schema await

	//add the agency
	load csv with headers from
	'file:///nmbs/agency.txt' as csv
	create (a:Agency {id: toInt(csv.agency_id), name: csv.agency_name, url: csv.agency_url, timezone: csv.agency_timezone});

	// add the routes
	load csv with headers from
	'file:///nmbs/routes.txt' as csv
	match (a:Agency {id: toInt(csv.agency_id)})
	create (a)-[:OPERATES]->(r:Route {id: csv.route_id, short_name: csv.route_short_name, long_name: csv.route_long_name, type: toInt(csv.route_type)});

	// add the trips
	load csv with headers from
	'file:///nmbs/trips.txt' as csv
	match (r:Route {id: csv.route_id})
	merge (r)<-[:USES]-(t:Trip {id: csv.trip_id, service_id: csv.service_id});

	//add the stops
	load csv with headers from
	'file:///nmbs/stops.txt' as csv
	create (s:Stop {id: csv.stop_id, name: csv.stop_name, lat: toFloat(csv.stop_lat), lon: toFloat(csv.stop_lon), platform_code: csv.platform_code, parent_station: csv.parent_station, location_type: csv.location_type});

	//connect parent/child relationships to stops
	load csv with headers from
	'file:///nmbs/stops.txt' as csv
	with csv
	where not (csv.parent_station is null)
	match (ps:Stop {id: csv.parent_station}), (s:Stop {id: csv.stop_id})
	create (ps)<-[:PART_OF]-(s);

	//add the stoptimes
	load csv with headers from
	'file:///nmbs/stop_times.txt' as csv
	match (t:Trip {id: csv.trip_id}), (s:Stop {id: csv.stop_id})
	create (t)<-[:PART_OF_TRIP]-(st:Stoptime {arrival_time: csv.arrival_time, departure_time: csv.departure_time, stop_sequence: toInt(csv.stop_sequence)})-[:LOCATED_AT]->(s);

	//create integers out of the stoptimes (to allow for calculations/ordering)
	match (s:Stoptime)
	set s.arrival_time_int=toInt(replace(s.arrival_time,":",""))/100
	set s.departure_time_int=toInt(replace(s.departure_time,":",""))/100


	//connect the stoptime sequences
	match (s1:Stoptime)-[:PART_OF_TRIP]->(t:Trip),
	(s2:Stoptime)-[:PART_OF_TRIP]->(t)
	where s2.stop_sequence=s1.stop_sequence+1
	create (s1)-[:PRECEDES]->(s2);
	//Query a GFTS dataset

	//Find ANTWERP
	match (s:Stop)
	where upper(s.name) starts with "ANTW"
	return s

	//Find Turnhout part 1
	match (s:Stop)
	where s.name starts with "Turn"
	return s

	//Find Turnhout part 2
	match (s:Stop)
	where upper(s.name) starts with "TURN"
	return s


	//Find Antwerp and Turnhout
	// match (s:Stop)
	// where s.name starts with "Antw"
	// or s.name starts with "Turn"
	// return s;

	match (ant:Stop), (tu:Stop)
	where ant.name starts with "Antw"
	AND tu.name starts with "Turn"
	return distinct tu,ant;

	// match (ant:Stop)
	// where ant.name starts with "Antw"
	// return ant.name as name
	// union
	// match (tu:Stop)
	// where tu.name starts with "Turn"
	// return tu.name as name;

	//find the links between the top-level stops
	match (t:Stop)<-[:PART_OF]-(:Stop),
	(a:Stop)<-[:PART_OF]-(:Stop)
	where t.name starts with "Turn"
	AND a.name="Antwerpen-Centraal"
	with t,a
	match p = allshortestpaths((t)-[*]-(a))
	return p
	limit 10;

	match (t:Stop),(a:Stop)
	where t.name starts with "Turn"
	AND a.name="Antwerpen-Centraal"
	with t,a
	match p = allshortestpaths((t)-[*]-(a))
	return p
	limit 10


	//range queries with and without indexes
	match (st:Stoptime)
	where st.departure_time < "07:45:00"
	return st.departure_time;

	//find a DIRECT route with range conditions
	match (tu:Stop {name: "Turnhout"})--(tu_st:Stoptime)
	where tu_st.departure_time > "07:00:00"
	AND tu_st.departure_time < "09:00:00"
	with tu, tu_st
	match (ant:Stop {name:"Antwerpen-Centraal"})--(ant_st:Stoptime)
	where ant_st.arrival_time < "09:00:00"
	AND ant_st.arrival_time > "07:00:00"
	and ant_st.arrival_time > tu_st.departure_time
	with ant,ant_st,tu, tu_st
	match p = allshortestpaths((tu_st)-[*]->(ant_st))
	with nodes(p) as n
	unwind n as nodes
	match (nodes)-[r]-()
	return nodes,r

	//Indirect routes

	//find the route and the stops for the indirect route
	match (t:Stop),(a:Stop)
	where t.name = "Turnhout"
	AND a.name="Arlon"
	with t,a
	match p = allshortestpaths((t)-[*]-(a))
	where NONE (x in relationships(p) where type(x)="OPERATES")
	return p
	limit 10

	//plan a specific indirect route
	MATCH (tu:Stop {name:"Turnhout"})--(st_tu:Stoptime),
	(ar:Stop {name:"Arlon"})--(st_ar:Stoptime),
	p1=((st_tu)-[:PRECEDES*]->(st_midway_arr:Stoptime)),
	(st_midway_arr)--(midway:Stop),
	(midway)--(st_midway_dep:Stoptime),
	p2=((st_midway_dep)-[:PRECEDES*]->(st_ar))
	WHERE
	st_tu.departure_time > "08:00:00"
	AND st_tu.departure_time < "11:00:00"
	AND st_midway_arr.arrival_time > st_tu.departure_time
	AND st_midway_dep.departure_time > st_midway_arr.arrival_time
	AND st_ar.arrival_time > st_midway_dep.departure_time
	RETURN
	tu,st_tu,ar,st_ar,p1,p2,midway
	order by (st_ar.arrival_time_int-st_tu.departure_time_int) ASC
	limit 1