-
First create ics file: https://support.google.com/calendar/answer/37111?hl=en
-
Convert ics to based on csv https://openicsfile.com/csv-convert.html
-
Put that .csv file into the import directory of your Neo4j installation.
Local file possible as well for your personal calendar of course!
Importing the basic events into Neo4j.
load csv with headers from "file:/calendarfull.csv" as csv
// load csv with headers from "https://docs.google.com/spreadsheets/d/13nvxbfsQKet3bELgf04_C6SkHz-ABoAGAgnMw-YO3oc/export?format=csv&id=13nvxbfsQKet3bELgf04_C6SkHz-ABoAGAgnMw-YO3oc&gid=0" as csv
create (e:Event)
set e = csv
set e.starttime = datetime(e.starttime)
set e.endtime = datetime(e.endtime)
set e.createdtime = datetime(e.createdtime)
set e.lastmodifiedtime = datetime(e.lastmodifiedtime);
Indexes to speed up later operations:
create index on :Event(starttime);
create index on :Event(endtime);
create index on :Event(createdtime);
create index on :Event(lastmodifiedtime);
create index on :Event(summary);
create index on :Event(description);
create index on :Company(name);
create constraint on (p:Person)
assert p.email is unique;
create index on :ConferenceProvider(name);
Just keep the recent events, 2020 only:
MATCH (e:Event)
WHERE e.starttime.year < 2020 OR e.calendartype = "personal"
DELETE e;
Events that don’t have an organiser, will receive me as organiser:
match (e:Event)
where e.organizer is null
set e.organizer = "mailto:rik@neo4j.com";
Clean up the email addresses in the events:
match (e:Event)
set e.organizer = replace(e.organizer,"mailto:","")
set e.attendee = replace(e.attendee,"mailto:","")
set e.organizer = replace(e.organizer,"@neotechnology.com", "@neo4j.com")
set e.organizer = replace(e.organizer,"@neo4j.org", "@neo4j.com")
set e.attendee = replace(e.attendee,"@neotechnology.com", "@neo4j.com")
set e.attendee = replace(e.attendee,"@neo4j.org", "@neo4j.com");
Extracting the relationships from the Event data.
Create the people and link them to the events
match (e:Event)
merge (p:Person {email: e.organizer})
create (p)-[:ORGANIZES]->(e);
Remove the attendees from the events and connect them
match (e:Event)
with e, split(e.attendee,";") as CollectionOfAttendees
unwind CollectionOfAttendees as IndividualAttendee
merge (p:Person {email: IndividualAttendee})
merge (p)-[:ATTENDS]->(e);
Extract the companies from the Person nodes
match (p:Person)
WITH distinct split(p.email,"@")[1] as domain, p
WHERE not domain in ['email.com','163.com','free.fr','gmail.com','group.calendar.google.com','resource.calendar.google.com','qq.com','hotmail.com','mail.com','outlook.com','skynet.be','vanbruggen.be']
merge (c:Company {name: toUpper(domain)})
merge (p)-[:WORKS_FOR]->(c);
Rename the companies, by removing the TLDN:
match (c:Company)
set c.name = replace(c.name,".COM","")
set c.name = replace(c.name,".ORG","")
set c.name = replace(c.name,".BE","")
set c.name = replace(c.name,".NL","")
set c.name = replace(c.name,".FR","")
set c.name = replace(c.name,".CO.UK","")
set c.name = replace(c.name,".GOV.UK","")
set c.name = replace(c.name,".CO","")
set c.name = replace(c.name,".DK","")
set c.name = replace(c.name,".NO","")
set c.name = replace(c.name,".IO","");
Because of previous step, we may have multiple accounts with same name (from different TLDNs). So we need to merge companies with same name ("DELOITTE" as example) with APOC procedure.
match (c1:Company), (c2:Company)
where id(c1) < id(c2)
and c1.name = c2.name
with collect([c1,c2]) as companies
unwind companies as company
call apoc.refactor.mergeNodes(company,{properties:"discard",mergeRels:true}) yield node
return node;
Take a look at the conference providers mentioned in the events:
match (e:Event)
where e.description contains "meet.google.com"
merge (cp:ConferenceProvider {name:"Google Meet"})
merge (e)-[:USES_CP]->(cp);
match (e:Event)
where e.description contains "neo4j.zoom.us"
merge (cp:ConferenceProvider {name:"Zoom"})
merge (e)-[:USES_CP]->(cp);
match (e:Event)
where e.description contains "teams.microsoft.com"
merge (cp:ConferenceProvider {name:"Microsoft Teams"})
merge (e)-[:USES_CP]->(cp);
match (e:Event)
where toUpper(e.description) contains "GOTOMEETING"
merge (cp:ConferenceProvider {name:"LogMeIn GoToMeeting"})
merge (e)-[:USES_CP]->(cp);
match (e:Event)
where toUpper(e.description) contains "WEBEX"
merge (cp:ConferenceProvider {name:"Cisco WebEx"})
merge (e)-[:USES_CP]->(cp);
Meetings per conference provider:
match (e:Event)--(cp:ConferenceProvider)
return cp.name, count(cp);
Meetings without conference providers:
match (e:Event)
where not ((e)--(:ConferenceProvider))
return count(e);
Nr of Confcalls per month:
match (e:Event)--(cp:ConferenceProvider)
return distinct e.starttime.month as month, count(cp)
order by month asc;
Or per week:
match (e:Event)--(cp:ConferenceProvider)
return distinct e.starttime.week as week, count(cp)
order by week asc;
Days with most meetings:
match (e:Event) return distinct date(e.starttime), count(e) order by
count(e) desc limit 10;
Using apoc.iterate as there could be a lot of links:
call apoc.periodic.iterate(
"match (p1:Person)-[:ATTENDS]->(e:Event)<-[:ATTENDS]-(p2:Person)
where id(p1)<id(p2)
return p1,p2,e",
"merge (p1)-[mwr:MEETS_WITH]->(p2)
on create set mwr.frequency = 1
on match set mwr.frequency = mwr.frequency + 1",
{batchsize:1000, parallel:false});
Then we can easily find the most frequent meeters:
match (p1:Person)-[mw:MEETS_WITH]->(p2:Person) return p1.email,
mw.frequency, p2.email order by mw.frequency desc limit 10;