Created
November 7, 2019 09:37
-
-
Save rvanbruggen/533ef8c16f005b1f2a3d1c61627f680e to your computer and use it in GitHub Desktop.
colruyt datascience homework assignment.cql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//colruyt datascience homework assignment in Neo4j | |
// from: https://github.com/MarkiesFredje/data-engineering-exercise/blob/master/data_engineer_exercise.ipynb | |
// json file download location: https://ecgplacesmw.colruytgroup.com/ecgplacesmw/v3/nl/places/filter/clp-places | |
//import into neo4j using apoc | |
//create indexes and constraint | |
create index on :Address(streetName); | |
create index on :City(name); | |
create index on :Country(name); | |
create index on :Branch(commercialName); | |
create index on :Branch(geoCoordinates); | |
create constraint on (b:Branch) assert b.id is unique; | |
//import addresses, placetypes, branches, and connect the branches to addresses and placetypes | |
WITH "https://ecgplacesmw.colruytgroup.com/ecgplacesmw/v3/nl/places/filter/clp-places" AS url | |
call apoc.load.json(url) yield value | |
merge ( a:Address { houseNumber: value.address.houseNumber, streetName: value.address.streetName, countryName: value.address.countryName, cityName: value.address.cityName, postalcode: value.address.postalcode, geoCoordinates: point({latitude: toFloat(value.geoCoordinates.latitude), longitude: toFloat(value.geoCoordinates.longitude) } ) } ) | |
merge (pt:PlaceType {id: value.placeType.id, description: value.placeType.placeTypeDescription, name: value.placeType.longName}) | |
merge (b:Branch | |
{id: value.branchId}) | |
set b.placeId=value.placeId | |
set b.isActive=value.isActive | |
set b.sourceStatus=value.sourceStatus | |
set b.routeUrl=value.routeUrl | |
set b.moreInfoUrl=value.moreInfoUrl | |
set b.commercialName=value.commercialName | |
set b.handoverServices=value.handoverServices | |
set b.sellingPartners=value.sellingPartners | |
merge (b)-[:IN_STREET]->(a) | |
merge (b)-[:HAS_PLACETYPE]->(pt) | |
return b,a,pt; | |
//extract city from addresses | |
match (a:Address) | |
merge (c:City {name: a.cityName, postalcode: a.postalcode}) | |
merge (a)-[:IN_CITY]->(c); | |
//extract countries from addresses | |
match (a:Address)--(c:City) | |
merge (co:Country {name: a.countryName}) | |
merge (c)-[:IN_COUNTRY]->(co); | |
//duplicate geocoordinates: stored both as geospatial data types, and as lat/long numbers | |
match (a:Address) | |
set a.longitude = a.geoCoordinates.longitude | |
set a.latitude = a.geoCoordinates.latitude; | |
//find branches close to home | |
with point({latitude: 51.2081948, longitude: 4.4049826}) as home | |
match path = (:Country)--(:City)--(a:Address)--(:Branch)--(:PlaceType) | |
where distance(a.geoCoordinates, home) < 2000 | |
return path | |
//find all addresses/branches that are within 2000m from one another | |
match (a1:Address)--(b1:Branch), (a2:Address)--(b2:Branch) | |
where id(a1)<id(a2) | |
and distance(a1.geoCoordinates, a2.geoCoordinates) < 2000 | |
return b1.commercialName, b2.commercialName, distance(a1.geoCoordinates, a2.geoCoordinates)/1000+" km" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment