marc0x71/load_gtfs.cql

## load_gtfs.cql
//LOAD CSV script for GFTS data
create constraint on (a:Agency) assert a.id is unique;
create constraint on (r:Route) assert r.id is unique;
create constraint on (t:Trip) assert t.id is unique;
create index on :Trip(service_id);
create constraint on (s:Stop) assert s.id is unique;
create index on :Stoptime(stop_sequence);
create index on :Stop(name);

//add the agency
USING PERIODIC COMMIT 500
load csv with headers from
'file:///agency.txt' as csv
create (a:Agency {id: csv.agency_id, name: csv.agency_name, url: csv.agency_url, timezone: csv.agency_timezone});

// add the routes
USING PERIODIC COMMIT 500
load csv with headers from
'file:///routes.txt' as csv
match (a:Agency {id: csv.agency_id})
create (a)-[:OPERATES]->(r:Route {id: csv.route_id, short_name: csv.route_short_name, long_name: csv.route_long_name, type: toInt(csv.route_type)});

// add the trips
USING PERIODIC COMMIT 500
load csv with headers from
'file:///trips.txt' as csv
WITH csv
match (r:Route {id: csv.route_id})
merge (r)<-[:USES]-(t:Trip {id: csv.trip_id, service_id: csv.service_id, direction_id: csv.direction_id });

//add the stops
USING PERIODIC COMMIT 500
load csv with headers from
'file:///stops.txt' as csv
create (s:Stop {id: csv.stop_id, name: csv.stop_name, lat: toFloat(csv.stop_lat), lon: toFloat(csv.stop_lon), platform_code: csv.platform_code, parent_station: csv.parent_station, location_type: csv.location_type, wheelchair_boarding: csv.wheelchair_boarding });

//connect parent/child relationships to stops
USING PERIODIC COMMIT 500
load csv with headers from
'file:///stops.txt' as csv
with csv
where not (csv.parent_station is null)
match (ps:Stop {id: csv.parent_station}), (s:Stop {id: csv.stop_id})
create (ps)<-[:PART_OF]-(s);

//add the stoptimes
USING PERIODIC COMMIT 500
load csv with headers from
'file:///stop_times.txt' as csv
match (t:Trip {id: csv.trip_id}), (s:Stop {id: csv.stop_id})
create (t)<-[:PART_OF_TRIP]-(st:Stoptime {arrival_time: csv.arrival_time, departure_time: csv.departure_time, stop_sequence: toInt(csv.stop_sequence)})-[:LOCATED_AT]->(s);

//create integers out of the stoptimes (to allow for calculations/ordering)
// run this statement until you get back a result of 0.
MATCH (s:Stoptime)
WHERE NOT EXISTS (s.arrival_time_int)
WITH s LIMIT 500000
set s.arrival_time_int=toInteger(replace(s.arrival_time,":","")),
s.departure_time_int=toInteger(replace(s.departure_time,":",""))
RETURN count(s);

//connect the stoptime sequences
match (s1:Stoptime)-[:PART_OF_TRIP]->(t:Trip),
(s2:Stoptime)-[:PART_OF_TRIP]->(t)
where s2.stop_sequence=s1.stop_sequence+1
create (s1)-[:PRECEDES]->(s2);
	//LOAD CSV script for GFTS data
	create constraint on (a:Agency) assert a.id is unique;
	create constraint on (r:Route) assert r.id is unique;
	create constraint on (t:Trip) assert t.id is unique;
	create index on :Trip(service_id);
	create constraint on (s:Stop) assert s.id is unique;
	create index on :Stoptime(stop_sequence);
	create index on :Stop(name);

	//add the agency
	USING PERIODIC COMMIT 500
	load csv with headers from
	'file:///agency.txt' as csv
	create (a:Agency {id: csv.agency_id, name: csv.agency_name, url: csv.agency_url, timezone: csv.agency_timezone});

	// add the routes
	USING PERIODIC COMMIT 500
	load csv with headers from
	'file:///routes.txt' as csv
	match (a:Agency {id: csv.agency_id})
	create (a)-[:OPERATES]->(r:Route {id: csv.route_id, short_name: csv.route_short_name, long_name: csv.route_long_name, type: toInt(csv.route_type)});

	// add the trips
	USING PERIODIC COMMIT 500
	load csv with headers from
	'file:///trips.txt' as csv
	WITH csv
	match (r:Route {id: csv.route_id})
	merge (r)<-[:USES]-(t:Trip {id: csv.trip_id, service_id: csv.service_id, direction_id: csv.direction_id });

	//add the stops
	USING PERIODIC COMMIT 500
	load csv with headers from
	'file:///stops.txt' as csv
	create (s:Stop {id: csv.stop_id, name: csv.stop_name, lat: toFloat(csv.stop_lat), lon: toFloat(csv.stop_lon), platform_code: csv.platform_code, parent_station: csv.parent_station, location_type: csv.location_type, wheelchair_boarding: csv.wheelchair_boarding });

	//connect parent/child relationships to stops
	USING PERIODIC COMMIT 500
	load csv with headers from
	'file:///stops.txt' as csv
	with csv
	where not (csv.parent_station is null)
	match (ps:Stop {id: csv.parent_station}), (s:Stop {id: csv.stop_id})
	create (ps)<-[:PART_OF]-(s);

	//add the stoptimes
	USING PERIODIC COMMIT 500
	load csv with headers from
	'file:///stop_times.txt' as csv
	match (t:Trip {id: csv.trip_id}), (s:Stop {id: csv.stop_id})
	create (t)<-[:PART_OF_TRIP]-(st:Stoptime {arrival_time: csv.arrival_time, departure_time: csv.departure_time, stop_sequence: toInt(csv.stop_sequence)})-[:LOCATED_AT]->(s);

	//create integers out of the stoptimes (to allow for calculations/ordering)
	// run this statement until you get back a result of 0.
	MATCH (s:Stoptime)
	WHERE NOT EXISTS (s.arrival_time_int)
	WITH s LIMIT 500000
	set s.arrival_time_int=toInteger(replace(s.arrival_time,":","")),
	s.departure_time_int=toInteger(replace(s.departure_time,":",""))
	RETURN count(s);

	//connect the stoptime sequences
	match (s1:Stoptime)-[:PART_OF_TRIP]->(t:Trip),
	(s2:Stoptime)-[:PART_OF_TRIP]->(t)
	where s2.stop_sequence=s1.stop_sequence+1
	create (s1)-[:PRECEDES]->(s2);