Last active
September 6, 2019 15:14
-
-
Save tomasonjo/5d64fc4e764d79f872d7170465895620 to your computer and use it in GitHub Desktop.
Airport and flights analysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
https://www.transtats.bts.gov/DL_SelectFields.asp?Table_ID=236 | |
Using May 2018 data | |
# Schema | |
CREATE CONSTRAINT ON (a:Airport) ASSERT a.code IS UNIQUE; | |
# Import | |
USING PERIODIC COMMIT 10000 | |
LOAD CSV WITH HEADERS FROM "file:///273590779_T_ONTIME.csv" AS row | |
MERGE (origin:Airport{code:row.ORIGIN_AIRPORT_ID}) | |
ON CREATE SET origin.name = row.ORIGIN | |
MERGE (destination:Airport{code:row.DEST_AIRPORT_ID}) | |
ON CREATE SET destination.name = row.DEST | |
MERGE (origin)-[connection:CONNECTED_TO{ | |
airline:row.UNIQUE_CARRIER, | |
flightNumber: row.FL_NUM, | |
date: row.FL_DATE}]->(destination) | |
# busiest airports | |
## outgoing flights and incoming flights are almost always the same (what goes down must go up, planes constantly rotate between airports) | |
MATCH (a:Airport) | |
RETURN a.name as airport,size((a)-[:CONNECTED_TO]->()) as outgoing_flights, size((a)<-[:CONNECTED_TO]-()) as incoming_flights | |
ORDER by outgoing_flights + incoming_flights DESC LIMIT 10 | |
# Most important airports in the network by pagerank | |
## | |
CALL algo.pageRank.stream('Airport','CONNECTED_TO') | |
YIELD nodeId,score | |
MATCH (n) WHERE id(n)=nodeId | |
RETURN n.name as airport,score order by score desc limit 10 | |
# Most connecting airports in the network according to betweenness centrality | |
CALL algo.betweenness.stream('Airport','CONNECTED_TO') | |
YIELD nodeId,centrality | |
MATCH (n) WHERE id(n)=nodeId | |
RETURN n.name as airport,centrality order by centrality desc limit 10 | |
# Flights grouped by airline | |
MATCH ()-[r:CONNECTED_TO]->() | |
RETURN r.airline as airline_carrier,count(*) as number_of_flights | |
ORDER BY number_of_flights DESC LIMIT 25 | |
# biggest scc component for top 10 airlines | |
## If I start from a specific airport how many different airports I can visit and come back to the starting airport using same airline (Interconnected airports by airline) | |
MATCH ()-[r:CONNECTED_TO]->() | |
WITH r.airline as airline_carrier,count(*) as number_of_flights | |
ORDER BY number_of_flights DESC LIMIT 10 | |
CALL apoc.cypher.run("CALL algo.scc.stream('MATCH (a:Airport) RETURN id(a) as id','MATCH (a1:Airport)-[c:CONNECTED_TO]->(a2:Airport)' +' WHERE c.airline = \"' + airline_carrier + '\" RETURN id(a1) as source,id(a2) as target', {graph:'cypher'}) YIELD nodeId, partition RETURN partition,count(*) as size order by size desc limit 1",{airline_carrier:airline_carrier}) YIELD value | |
RETURN airline_carrier,number_of_flights, value.size as biggest_scc_size | |
# Label propagation for "OO" airline carier | |
## Find airports that form communities within the network for given airline carrier | |
CALL algo.labelPropagation.stream('MATCH (a:Airport) return id(a) as id', | |
'MATCH (a1:Airport)-[r:CONNECTED_TO]->(a2:Airport) WHERE r.airline = "OO" WITH a1,a2,count(*) as weight return id(a1) as source,id(a2) as target, weight as weight',{graph:'cypher'}) YIELD nodeId,label | |
RETURN label,count(*) as size order by size desc limit 10 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment