Skip to content

Instantly share code, notes, and snippets.

@tekiegirl
Last active August 29, 2015 14:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tekiegirl/07a1abbd47df81c37b9f to your computer and use it in GitHub Desktop.
Save tekiegirl/07a1abbd47df81c37b9f to your computer and use it in GitHub Desktop.
Baby Names Graph Gist
== Baby Names Graph
:neo4j-version: neo4j-2.1
:author: Jacqui Read
:twitter: @tekiegirl
=== Initial Data Setup
//setup
//hide
[source,cypher]
----
MERGE (y13:Year {year: 2013})
MERGE (y12:Year {year: 2012})
MERGE (y03:Year {year: 2003})
----
//setup
//hide
[source,cypher]
----
CREATE INDEX ON :Name(name);
USING PERIODIC COMMIT 1000
LOAD CSV WITH HEADERS FROM "https://dl.dropboxusercontent.com/u/2900504/babynames.csv" AS csvLine
WITH csvLine
MERGE (name:Name {name: csvLine.Name})
WITH name, csvLine
MERGE (gen:Gender {name: csvLine.Gender})
WITH name, csvLine, gen
MATCH (y:Year {year: 2013})
MERGE (name)-[:RANKED {rank: toInt(csvLine.Rank), count: toInt(csvLine.Count)}]->(y)
MERGE (name)-[:OF_GENDER]->(gen)
WITH name, csvLine
MATCH (y12:Year {year: 2012})
MERGE (name)-[:RANKED {rank: toInt(csvLine.Rank2012)}]->(y12)
WITH name, csvLine
MATCH (y03:Year {year: 2003})
MERGE (name)-[:RANKED {rank: toInt(csvLine.Rank2003)}]->(y03)
RETURN count(*) as All_Names;
----
//table
There are 201 names, as rank 100 for girls in 2013 is held by two names (hence 101 girl's names, but 100 boy's names).
=== What are the top 10 boy's names of 2013?
[source,cypher]
----
MATCH (y:Year {year: 2013})<-[r:RANKED]-(n:Name)-[OF_GENDER]->(g:Gender {name: 'Boy'})
WHERE r.rank <= 10
RETURN n.name AS Name, r.rank AS Rank
ORDER BY r.rank
----
//table
=== What are the top 10 girl's names of 2013?
[source,cypher]
----
MATCH (y:Year {year: 2013})<-[r:RANKED]-(n:Name)-[OF_GENDER]->(g:Gender {name: 'Girl'})
WHERE r.rank <= 10
RETURN n.name AS Name, r.rank AS Rank
ORDER BY r.rank
----
//table
=== Which names have gone up the most since 2012?
This does not take into account names that are not ranked in both years.
[source,cypher]
----
MATCH (:Year {year: 2013})<-[r13:RANKED]-(n:Name)-[r12:RANKED]->(:Year {year: 2012})
WHERE r13.rank < r12.rank
RETURN n.name AS Name, r13.rank AS Rank_2013, r12.rank AS Rank_2012, (r13.rank - r12.rank) * -1 AS Gain
ORDER BY Gain DESC
LIMIT 10
----
//table
=== Which names have gone up the most since 2003?
This does not take into account names that are not ranked in both years.
[source,cypher]
----
MATCH (:Year {year: 2013})<-[r13:RANKED]-(n:Name)-[r03:RANKED]->(:Year {year: 2003})
WHERE r13.rank < r03.rank
RETURN n.name AS Name, r13.rank AS Rank_2013, r03.rank AS Rank_2003, (r13.rank - r03.rank) * -1 AS Gain
ORDER BY Gain DESC
LIMIT 10
----
//table
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment