Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rvanbruggen/533ef8c16f005b1f2a3d1c61627f680e to your computer and use it in GitHub Desktop.
Save rvanbruggen/533ef8c16f005b1f2a3d1c61627f680e to your computer and use it in GitHub Desktop.
colruyt datascience homework assignment.cql
//colruyt datascience homework assignment in Neo4j
// from: https://github.com/MarkiesFredje/data-engineering-exercise/blob/master/data_engineer_exercise.ipynb
// json file download location: https://ecgplacesmw.colruytgroup.com/ecgplacesmw/v3/nl/places/filter/clp-places
//import into neo4j using apoc
//create indexes and constraint
create index on :Address(streetName);
create index on :City(name);
create index on :Country(name);
create index on :Branch(commercialName);
create index on :Branch(geoCoordinates);
create constraint on (b:Branch) assert b.id is unique;
//import addresses, placetypes, branches, and connect the branches to addresses and placetypes
WITH "https://ecgplacesmw.colruytgroup.com/ecgplacesmw/v3/nl/places/filter/clp-places" AS url
call apoc.load.json(url) yield value
merge ( a:Address { houseNumber: value.address.houseNumber, streetName: value.address.streetName, countryName: value.address.countryName, cityName: value.address.cityName, postalcode: value.address.postalcode, geoCoordinates: point({latitude: toFloat(value.geoCoordinates.latitude), longitude: toFloat(value.geoCoordinates.longitude) } ) } )
merge (pt:PlaceType {id: value.placeType.id, description: value.placeType.placeTypeDescription, name: value.placeType.longName})
merge (b:Branch
{id: value.branchId})
set b.placeId=value.placeId
set b.isActive=value.isActive
set b.sourceStatus=value.sourceStatus
set b.routeUrl=value.routeUrl
set b.moreInfoUrl=value.moreInfoUrl
set b.commercialName=value.commercialName
set b.handoverServices=value.handoverServices
set b.sellingPartners=value.sellingPartners
merge (b)-[:IN_STREET]->(a)
merge (b)-[:HAS_PLACETYPE]->(pt)
return b,a,pt;
//extract city from addresses
match (a:Address)
merge (c:City {name: a.cityName, postalcode: a.postalcode})
merge (a)-[:IN_CITY]->(c);
//extract countries from addresses
match (a:Address)--(c:City)
merge (co:Country {name: a.countryName})
merge (c)-[:IN_COUNTRY]->(co);
//duplicate geocoordinates: stored both as geospatial data types, and as lat/long numbers
match (a:Address)
set a.longitude = a.geoCoordinates.longitude
set a.latitude = a.geoCoordinates.latitude;
//find branches close to home
with point({latitude: 51.2081948, longitude: 4.4049826}) as home
match path = (:Country)--(:City)--(a:Address)--(:Branch)--(:PlaceType)
where distance(a.geoCoordinates, home) < 2000
return path
//find all addresses/branches that are within 2000m from one another
match (a1:Address)--(b1:Branch), (a2:Address)--(b2:Branch)
where id(a1)<id(a2)
and distance(a1.geoCoordinates, a2.geoCoordinates) < 2000
return b1.commercialName, b2.commercialName, distance(a1.geoCoordinates, a2.geoCoordinates)/1000+" km"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment