Skip to content

Instantly share code, notes, and snippets.

@tomasonjo
Last active September 6, 2019 15:14
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tomasonjo/5d64fc4e764d79f872d7170465895620 to your computer and use it in GitHub Desktop.
Save tomasonjo/5d64fc4e764d79f872d7170465895620 to your computer and use it in GitHub Desktop.
Airport and flights analysis
https://www.transtats.bts.gov/DL_SelectFields.asp?Table_ID=236
Using May 2018 data
# Schema
CREATE CONSTRAINT ON (a:Airport) ASSERT a.code IS UNIQUE;
# Import
USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS FROM "file:///273590779_T_ONTIME.csv" AS row
MERGE (origin:Airport{code:row.ORIGIN_AIRPORT_ID})
ON CREATE SET origin.name = row.ORIGIN
MERGE (destination:Airport{code:row.DEST_AIRPORT_ID})
ON CREATE SET destination.name = row.DEST
MERGE (origin)-[connection:CONNECTED_TO{
airline:row.UNIQUE_CARRIER,
flightNumber: row.FL_NUM,
date: row.FL_DATE}]->(destination)
# busiest airports
## outgoing flights and incoming flights are almost always the same (what goes down must go up, planes constantly rotate between airports)
MATCH (a:Airport)
RETURN a.name as airport,size((a)-[:CONNECTED_TO]->()) as outgoing_flights, size((a)<-[:CONNECTED_TO]-()) as incoming_flights
ORDER by outgoing_flights + incoming_flights DESC LIMIT 10
# Most important airports in the network by pagerank
##
CALL algo.pageRank.stream('Airport','CONNECTED_TO')
YIELD nodeId,score
MATCH (n) WHERE id(n)=nodeId
RETURN n.name as airport,score order by score desc limit 10
# Most connecting airports in the network according to betweenness centrality
CALL algo.betweenness.stream('Airport','CONNECTED_TO')
YIELD nodeId,centrality
MATCH (n) WHERE id(n)=nodeId
RETURN n.name as airport,centrality order by centrality desc limit 10
# Flights grouped by airline
MATCH ()-[r:CONNECTED_TO]->()
RETURN r.airline as airline_carrier,count(*) as number_of_flights
ORDER BY number_of_flights DESC LIMIT 25
# biggest scc component for top 10 airlines
## If I start from a specific airport how many different airports I can visit and come back to the starting airport using same airline (Interconnected airports by airline)
MATCH ()-[r:CONNECTED_TO]->()
WITH r.airline as airline_carrier,count(*) as number_of_flights
ORDER BY number_of_flights DESC LIMIT 10
CALL apoc.cypher.run("CALL algo.scc.stream('MATCH (a:Airport) RETURN id(a) as id','MATCH (a1:Airport)-[c:CONNECTED_TO]->(a2:Airport)' +' WHERE c.airline = \"' + airline_carrier + '\" RETURN id(a1) as source,id(a2) as target', {graph:'cypher'}) YIELD nodeId, partition RETURN partition,count(*) as size order by size desc limit 1",{airline_carrier:airline_carrier}) YIELD value
RETURN airline_carrier,number_of_flights, value.size as biggest_scc_size
# Label propagation for "OO" airline carier
## Find airports that form communities within the network for given airline carrier
CALL algo.labelPropagation.stream('MATCH (a:Airport) return id(a) as id',
'MATCH (a1:Airport)-[r:CONNECTED_TO]->(a2:Airport) WHERE r.airline = "OO" WITH a1,a2,count(*) as weight return id(a1) as source,id(a2) as target, weight as weight',{graph:'cypher'}) YIELD nodeId,label
RETURN label,count(*) as size order by size desc limit 10
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment