rvanbruggen/1-import GCE2017 schedule.cql

## 1-import GCE2017 schedule.cql
create index on :Company(name);
create index on :Floor(name);
create index on :Person(name);
create index on :Time(time);
create index on :Room(name);
create index on :Session(title);
create index on :Tag(name);
create index on :Track(name);

schema await;

//add the speakers and companies
load csv with headers from
"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps/export?format=csv&id=1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps&gid=1504480307" as csv
merge (p:Person {name: csv.name, title: csv.title, bio: csv.bio})
merge (c:Company {name: csv.company})
with csv
match (p:Person {name: csv.name, title: csv.title, bio: csv.bio}), (c:Company {name: csv.company})
merge (p)-[:WORKS_FOR]->(c);

//add the rooms, tracks, floors
load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps/export?format=csv&id=1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps&gid=284108" as csv
merge (f:Floor {name: csv.floor})
merge (r:Room {name: csv.room})-[:LOCATED_ON]->(f)
merge (t:Track {name: csv.track});


//add the timeslots to each day
load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps/export?format=csv&id=1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps&gid=284108" as csv
merge (t1:Time {time: toInt(csv.start)})
merge (t2:Time {time: toInt(csv.end)});

//Connecting the timeslots
match (t:Time)
with t
order by t.time ASC
with collect(t) as times
  foreach (i in range(0,length(times)-2) |
    foreach (t1 in [times[i]] |
      foreach (t2 in [times[i+1]] |
        merge (t1)-[:FOLLOWED_BY]->(t2))));

//add the sessions and connect them up
load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps/export?format=csv&id=1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps&gid=284108" as csv
match (t2:Time {time: toInt(csv.end)}),
(t1:Time {time: toInt(csv.start)}),
(r:Room {name: csv.room}),
(t:Track {name: csv.track}),
(p:Person {name: csv.speaker})
merge (s:Session {title: csv.title})
set s.abstract = csv.abstract
set s.tags = csv.tags
merge (s)<-[:SPEAKS_IN]-(p)
merge (s)-[:IN_ROOM]->(r)
merge (s)-[:STARTS_AT]->(t1)
merge (s)-[:ENDS_AT]->(t2)
merge (s)-[:IN_TRACK]->(t);

//extract the tags
match (s:Session)
with s, [t in split(s.tags,",") | trim(t)] as tags
unwind tags as tag
merge (t:Tag {name: tag})
merge (s)-[:TAGGED_AS]->(t)
remove s.tags;

## 2-query GCE2017 schedule graph.cql
//Querying the GraphConnect Europe 2017 Schedule

//query 1:
match (t:Time)<--(s:Session)--(connections)
return t,s,connections
limit 50

//query 2: Look at two people
match (p1:Person), (p2:Person),
path = allshortestpaths( (p1)-[*]-(p2) )
where p1.name contains "Morgner"
and p2.name contains "Webber"
return path

//query 3: Look at a person and a company
match (c:Company {name:"GraphAware"}), (p:Person {name:"Jim Webber"}),
path = allshortestpaths( (c)-[*]-(p) )
return path

//query 4: Look at sessions with more than one speaker
match (s:Session)-[r:SPEAKS_IN]-(p:Person)
with s, collect(p) as person, count(p) as count
where count > 1
return s,person

## 3-GCE2017 schedule graph graphgist.adoc

      
    Raw
  

              3-GCE2017 schedule graph graphgist.adoc
            
          
    The GraphConnect Europe 2017 Schedule Graph


Yey! It’s that time of the year again! We are full-steam getting ready for the Bi-Yearly Festival of Graphs also known as GraphConnect. There’s another great conference lined up for us. The entire Neo4j crew will be there in full force - and of course we had to create another Schedule graph - just for fun.


A Google Sheet as the main repository


I had to of course start from the schedule on the GraphConnect website, and convert that into a google sheet with all the data. Once I had that, I could add the data pretty easily with this model:


Very simple - but it’s so much nicer when you can make it interactive and load it into Neo4j. Let’s do that. Let’s load that data into this graphgist.


//add the speakers and companies
load csv with headers from
"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps/export?format=csv&id=1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps&gid=1504480307" as csv
merge (p:Person {name: csv.name, title: csv.title, bio: csv.bio})
merge (c:Company {name: csv.company})
with csv
match (p:Person {name: csv.name, title: csv.title, bio: csv.bio}), (c:Company {name: csv.company})
merge (p)-[:WORKS_FOR]->(c);

//add the rooms, tracks, floors
load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps/export?format=csv&id=1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps&gid=284108" as csv
merge (f:Floor {name: csv.floor})
merge (r:Room {name: csv.room})-[:LOCATED_ON]->(f)
merge (t:Track {name: csv.track});


//add the timeslots to each day
load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps/export?format=csv&id=1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps&gid=284108" as csv
merge (t1:Time {time: toInt(csv.start)})
merge (t2:Time {time: toInt(csv.end)});

//Connecting the timeslots
match (t:Time)
with t
order by t.time ASC
with collect(t) as times
  foreach (i in range(0,length(times)-2) |
    foreach (t1 in [times[i]] |
      foreach (t2 in [times[i+1]] |
        merge (t1)-[:FOLLOWED_BY]->(t2))));

//add the sessions and connect them up
load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps/export?format=csv&id=1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps&gid=284108" as csv
match (t2:Time {time: toInt(csv.end)}),
(t1:Time {time: toInt(csv.start)}),
(r:Room {name: csv.room}),
(t:Track {name: csv.track}),
(p:Person {name: csv.speaker})
merge (s:Session {title: csv.title})
set s.abstract = csv.abstract
set s.tags = csv.tags
merge (s)<-[:SPEAKS_IN]-(p)
merge (s)-[:IN_ROOM]->(r)
merge (s)-[:STARTS_AT]->(t1)
merge (s)-[:ENDS_AT]->(t2)
merge (s)-[:IN_TRACK]->(t);

//extract the tags
match (s:Session)
with s, [t in split(s.tags,",") | trim(t)] as tags
unwind tags as tag
merge (t:Tag {name: tag})
merge (s)-[:TAGGED_AS]->(t)
remove s.tags;


Let’s take a look at what we have now:


Ok - so that looks like a big fat hairball. Not very useful. So let’s try to zoom in a bit, and run a simple query over our graph: let’s find a couple of sessions in Day 1:


match (t:Time)<--(s:Session)--(connections)
return t,s,connections
limit 50


and here’s a sample of the graph:


Let’s do another query:


match path = allshortestpaths( (p1:Person)-[*]-(p2:Person) )
where p1.name contains "Morgner"
and p2.name contains "Webber"
return path


and display the result


Let’s now look at a link between a person (Jim Webber, of Neo fame) and an Organisation (The Guardian.


match (c:Company {name:"The Guardian"}), (p:Person {name:"Jim Webber"}),
path = allshortestpaths( (c)-[*]-(p) )
return path


and again diplay the result:


Last one for fun: let’s look at the sessions that have more than one speaker:


match (s:Session)-[r:SPEAKS_IN]-(p:Person)
with s, collect(p) as person, count(p) as count
where count > 1
return s,person


and display it:


Just a start…


There are so many other things that we could look at. Use the console below to explore if you are interested in more.


I hope this gist was interesting for you, and that we will see each other soon.


This gist was created by Rik Van Bruggen


My Blog


the Graphistania Neo4j Graph Database Podcast


My Book


On Twitter


On LinkedIn
	create index on :Company(name);
	create index on :Floor(name);
	create index on :Person(name);
	create index on :Time(time);
	create index on :Room(name);
	create index on :Session(title);
	create index on :Tag(name);
	create index on :Track(name);

	schema await;

	//add the speakers and companies
	load csv with headers from
	"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps/export?format=csv&id=1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps&gid=1504480307" as csv
	merge (p:Person {name: csv.name, title: csv.title, bio: csv.bio})
	merge (c:Company {name: csv.company})
	with csv
	match (p:Person {name: csv.name, title: csv.title, bio: csv.bio}), (c:Company {name: csv.company})
	merge (p)-[:WORKS_FOR]->(c);

	//add the rooms, tracks, floors
	load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps/export?format=csv&id=1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps&gid=284108" as csv
	merge (f:Floor {name: csv.floor})
	merge (r:Room {name: csv.room})-[:LOCATED_ON]->(f)
	merge (t:Track {name: csv.track});


	//add the timeslots to each day
	load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps/export?format=csv&id=1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps&gid=284108" as csv
	merge (t1:Time {time: toInt(csv.start)})
	merge (t2:Time {time: toInt(csv.end)});

	//Connecting the timeslots
	match (t:Time)
	with t
	order by t.time ASC
	with collect(t) as times
	foreach (i in range(0,length(times)-2) \|
	foreach (t1 in [times[i]] \|
	foreach (t2 in [times[i+1]] \|
	merge (t1)-[:FOLLOWED_BY]->(t2))));

	//add the sessions and connect them up
	load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps/export?format=csv&id=1Hu4l5cfnn6efsAvjNq0DmUyW1K5EiyDd7jpo3Ei3Mps&gid=284108" as csv
	match (t2:Time {time: toInt(csv.end)}),
	(t1:Time {time: toInt(csv.start)}),
	(r:Room {name: csv.room}),
	(t:Track {name: csv.track}),
	(p:Person {name: csv.speaker})
	merge (s:Session {title: csv.title})
	set s.abstract = csv.abstract
	set s.tags = csv.tags
	merge (s)<-[:SPEAKS_IN]-(p)
	merge (s)-[:IN_ROOM]->(r)
	merge (s)-[:STARTS_AT]->(t1)
	merge (s)-[:ENDS_AT]->(t2)
	merge (s)-[:IN_TRACK]->(t);

	//extract the tags
	match (s:Session)
	with s, [t in split(s.tags,",") \| trim(t)] as tags
	unwind tags as tag
	merge (t:Tag {name: tag})
	merge (s)-[:TAGGED_AS]->(t)
	remove s.tags;
	//Querying the GraphConnect Europe 2017 Schedule

	//query 1:
	match (t:Time)<--(s:Session)--(connections)
	return t,s,connections
	limit 50

	//query 2: Look at two people
	match (p1:Person), (p2:Person),
	path = allshortestpaths( (p1)-[*]-(p2) )
	where p1.name contains "Morgner"
	and p2.name contains "Webber"
	return path

	//query 3: Look at a person and a company
	match (c:Company {name:"GraphAware"}), (p:Person {name:"Jim Webber"}),
	path = allshortestpaths( (c)-[*]-(p) )
	return path

	//query 4: Look at sessions with more than one speaker
	match (s:Session)-[r:SPEAKS_IN]-(p:Person)
	with s, collect(p) as person, count(p) as count
	where count > 1
	return s,person