Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?

The World Cup Graph

Initial Data Setup

CREATE INDEX ON :Match(id);
CREATE INDEX ON :WorldCup(name);
CREATE INDEX ON :Stadium(name);
CREATE INDEX ON :Phase(phase);
CREATE INDEX ON :Country(name);
CREATE INDEX ON :Time(time);
CREATE INDEX ON :MatchNumber(value);
USING PERIODIC COMMIT 1000
LOAD CSV WITH HEADERS FROM "https://dl.dropboxusercontent.com/u/7619809/matches.csv" AS csvLine

WITH csvLine, toInt(csvLine.match_number) AS matchNumber

WITH csvLine,
     CASE WHEN csvLine.phase = ""
     THEN
     	CASE WHEN matchNumber <= 48 THEN "Group matches"
     	     WHEN matchNumber > 48 AND matchNumber <= 56 THEN "Round of 16"
     	     WHEN matchNumber > 56 AND matchNumber <= 60 THEN "Quarter-finals"
     	     WHEN matchNumber > 60 AND matchNumber <= 62 THEN "Semi-finals"
     	     WHEN matchNumber = 63 THEN "Third place"
     		 ELSE "Final"
     	END
     ELSE
     	csvLine.phase
	END AS phase, matchNumber

MERGE (match:Match {id: csvLine.id})
SET match.h_score = csvLine.h_score,
    match.a_score = csvLine.a_score,
    match.attendance = csvLine.attendance,
    match.date = csvLine.date,
    match.description = csvLine.home + " vs. " + csvLine.away

MERGE (host:Country {name: csvLine.host})

MERGE (home:Country {name: csvLine.home})
MERGE (match)-[:HOME_TEAM]->(home)

MERGE (away:Country {name: csvLine.away})
MERGE (match)-[:AWAY_TEAM]->(away)

MERGE (year:Year {year: toInt(csvLine.year)})

MERGE (worldCup:WorldCup {name: csvLine.world_cup})
MERGE (match)<-[:CONTAINS_MATCH]-(worldCup)
MERGE (host)<-[:HOSTED_BY]-(worldCup)
MERGE (year)<-[:IN_YEAR]-(worldCup)

MERGE (stadium:Stadium {name: csvLine.stadium})
MERGE (match)-[:PLAYED_IN_STADIUM]->(stadium)

MERGE (p:Phase {name: phase})
MERGE (match)-[:IN_PHASE]->(p)

MERGE (mn:MatchNumber {value: matchNumber})
MERGE (match)-[:HAS_MATCH_NUMBER]->(mn)

MERGE (time:Time {time: csvLine.time})
MERGE (match)-[:PLAYED_AT_TIME]->(time)

RETURN count(*) as matches;

The graph is too large to visualize nicely, that’s why here only a few matches from 2010 as example.

MATCH (match)<-[:CONTAINS_MATCH]-(worldCup)-[:HOSTED_BY]-(host),
      (worldCup)-[:IN_YEAR]-(year:Year {year:2010})

RETURN *
LIMIT 10

Which Worldcups are in this dataset?

MATCH (stadium:Stadium)<-[:PLAYED_IN_STADIUM]-()<-[:CONTAINS_MATCH]-(worldCup:WorldCup)-[:HOSTED_BY]-(host:Country),
      (worldCup)-[:IN_YEAR]-(year:Year)

RETURN worldCup.name,year.year,host.name,collect(distinct stadium.name)
ORDER BY year.year ASC

Find the stadiums that hosted the most World Cup matches and which World Cups those were in.

MATCH (stadium:Stadium)<-[:PLAYED_IN_STADIUM]-()<-[:CONTAINS_MATCH]-(wc)-[:HOSTED_BY]-(host),
      (wc)-[:IN_YEAR]-(year)

WITH stadium, host, COUNT(*) as count, COLLECT(DISTINCT year.year) AS years

UNWIND years as year

WITH stadium, host, count, year
ORDER BY stadium.name, host.name, year

RETURN stadium.name, host.name, COLLECT(year) AS years, count
ORDER BY count DESC
LIMIT 5

Which countries hosted the most World Cups and when?

MATCH (host:Country)<-[:HOSTED_BY]-()-[:IN_YEAR]->(year)

WITH host, COUNT(*) AS times, COLLECT(year.year) AS years
UNWIND years AS year

WITH host, times, year
ORDER BY times DESC, year

RETURN host.name, times, COLLECT(year) AS years
ORDER BY times DESC
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment