Skip to content

Instantly share code, notes, and snippets.

@dacr
Last active June 28, 2024 23:14
Show Gist options
  • Save dacr/ed1264861cb0f0a82afd1bd45c272a92 to your computer and use it in GitHub Desktop.
Save dacr/ed1264861cb0f0a82afd1bd45c272a92 to your computer and use it in GitHub Desktop.
check neo4j spatial features / published by https://github.com/dacr/code-examples-manager #6a213101-b3ef-4a3e-abbe-cb5c0bcde878/df16705c9ceff483869c2bb2a4f29c88a7866fff
// summary : check neo4j spatial features
// keywords : scala, neo4j, cypher, spatial, @testable
// publish : gist
// authors : David Crosson
// license : Apache NON-AI License Version 2.0 (https://raw.githubusercontent.com/non-ai-licenses/non-ai-licenses/main/NON-AI-APACHE2)
// id : 6a213101-b3ef-4a3e-abbe-cb5c0bcde878
// created-on : 2024-06-20T14:18:48+02:00
// managed-by : https://github.com/dacr/code-examples-manager
// run-with : scala-cli $file
// ---------------------
//> using scala 3.4.2
//> using javaOpt -Xmx5g
//> using dep org.neo4j.test:neo4j-harness:5.20.0
//> using dep org.neo4j.driver:neo4j-java-driver:5.21.0
//> using dep org.wvlet.airframe::airframe-ulid:24.6.1
//> using dep com.lihaoyi::requests:0.8.3
//> using dep com.lihaoyi::os-lib:0.10.2
// ---------------------
// =====================================================================================
// see opendata-postalcodes.sc gist
//val openDataPostalCodesHome = "https://www.data.gouv.fr/fr/datasets/base-officielle-des-codes-postaux/"
//val openDataPostalCodesDataSourceURI = "https://www.data.gouv.fr/fr/datasets/r/3062548d-f510-4ded-ba38-a64126a5331b"
val postalCodeDataSourceURL = "https://datanova.laposte.fr/data-fair/api/v1/datasets/laposte-hexasmal/metadata-attachments/base-officielle-codes-postaux.csv"
val postalCodeFileCache = os.pwd / "base-officielle-codes-postaux.csv"
val departmentDataSourceURL = "https://www.data.gouv.fr/fr/datasets/r/70cef74f-70b1-495a-8500-c089229c0254"
val departmentFileCache = os.pwd / "departements-france.csv"
// ---------------------------------------------------------------------------------------------------------------------
case class Point(
latitude: Double,
longitude: Double
)
case class Country(
name: String
) {
val id = name.toLowerCase.replaceAll("[^a-z]", "")
}
case class Region(
code: String,
name: String,
countryId: String
) {
val id = s"reg$code"
}
case class Department(
code: String,
name: String,
region: Region
) {
val id = s"dep$code"
}
case class Places(
townCode: String,
townName: String,
postalCode: String,
secondaryTownName: Option[String],
deliveryLabel: Option[String],
gps: Option[Point]
) {
val departmentCode = townCode.take(if (townCode.startsWith("97") || townCode.startsWith("98") || townCode.startsWith("99")) 3 else 2)
val departmentId = s"dep$departmentCode"
val code = postalCode
val name = (List(townName) :++ secondaryTownName :++ deliveryLabel).distinct.mkString(" ")
val nameSlug = name.replaceAll("[^A-Z]", "_")
val id = s"PLACE_${nameSlug}_$code" // a unique town identifier
}
def stringToGPS(input: String): Option[Point] = {
input.split(",").map(_.trim) match {
case Array(latitude, longitude) =>
for {
lat <- latitude.toDoubleOption
lon <- longitude.toDoubleOption
} yield Point(lat, lon)
case _ => None
}
}
def stringToPostalCode(input: String): Places = {
input.trim // with some basic hack parsing to support both format, opendata and laposte ones
.replaceAll("\",\"", ";")
.replaceAll("^\"(.*)\"$", "$1")
.replaceAll("\",,\"", ";;")
.replaceAll("\",,", ";;")
.replaceAll("\"", "")
.split(";") match {
case Array(townCode, townName, postalCode, "", deliveryLabel, position) =>
Places(townCode, townName, postalCode, None, Some(deliveryLabel).filter(_.trim.nonEmpty), stringToGPS(position))
case Array(townCode, townName, postalCode, secondaryTownName, deliveryLabel, position) =>
Places(townCode, townName, postalCode, Some(secondaryTownName).filter(_.trim.nonEmpty), Some(deliveryLabel).filter(_.trim.nonEmpty), stringToGPS(position))
case Array(townCode, townName, postalCode, secondaryTownName, deliveryLabel) =>
Places(townCode, townName, postalCode, Some(secondaryTownName).filter(_.trim.nonEmpty), Some(deliveryLabel).filter(_.trim.nonEmpty), None)
case Array(townCode, townName, postalCode, secondaryTownName) =>
Places(townCode, townName, postalCode, Some(secondaryTownName).filter(_.trim.nonEmpty), None, None)
}
}
def stringToDepartment(input: String): Department = {
input.trim
.split(",") match {
case Array(code, name, regionCode, regionName) =>
Department(code, name, Region(regionCode, regionName, countryId = "france"))
}
}
def getData(dataSourceURL: String, dataSourceCacheFile: os.Path): Vector[String] = {
if (os.exists(dataSourceCacheFile)) os.read(dataSourceCacheFile).split("\n").toVector
else {
val data = requests.get(dataSourceURL)
os.write(dataSourceCacheFile, data)
data.lines()
}
}
val postalCodes =
getData(postalCodeDataSourceURL, postalCodeFileCache)
.drop(1) // first line == the CSV labels
.map(stringToPostalCode)
val departments =
getData(departmentDataSourceURL, departmentFileCache)
.drop(1) // first line == the CSV labels
.map(stringToDepartment)
val regions =
departments
.map(_.region)
.distinct
val countries =
Vector(Country("France"))
// =====================================================================================
import org.neo4j.driver.{AuthTokens, GraphDatabase, Session}
import org.neo4j.harness.Neo4jBuilders
import wvlet.airframe.ulid.ULID
import scala.util.Using
import scala.util.Random.*
import scala.util.chaining.*
import scala.jdk.CollectionConverters.*
// create a file with all cypher statements for regions, departments, cities
def dataImportUsingFixture(): String = {
val fixtureCountries =
countries.map(c => s"""CREATE (${c.id}:Country {id:"${c.id}", name:"${c.name}"})""".stripMargin)
val fixtureRegions =
regions
.map(r => s"""CREATE (${r.id}:Region {id:"${r.id}", name: "${r.name}", code: "${r.code}"})
|CREATE (${r.id}:Region)-[:InCountry]->(${r.countryId}:Country)""".stripMargin)
val fixtureDepartments =
departments
.map(d => s"""CREATE (${d.id}:Department {id:"${d.id}", name: "${d.name}", code: "${d.code}"})
|CREATE (${d.id}:Department)-[:InRegion]->(${d.region.id}:Region)""".stripMargin)
val fixtureCities =
postalCodes
.filter(_.gps.isDefined)
.map(pc => s"""CREATE (${pc.id}:Place {id:"${pc.id}", name:"${pc.name}", point:point({latitude:${pc.gps.get.latitude}, longitude: ${pc.gps.get.longitude}}), code:"${pc.code}"})
|CREATE (${pc.id}:Place)-[:InDepartment]->(${pc.departmentId}:Department)""".stripMargin)
val fixtures = (fixtureRegions ++ fixtureDepartments ++ fixtureCities).mkString("\n")
os.write.over(os.pwd / "neo4j-spatial-features.fixtures", fixtures)
fixtures
}
def createIndexes(session: Session): Unit = {
session.run("CREATE INDEX regions_index IF NOT EXISTS FOR (r:Region) ON (r.id)")
session.run("CREATE INDEX departments_index IF NOT EXISTS FOR (d:Department) ON (d.id)")
session.run("CREATE INDEX places_index IF NOT EXISTS FOR (c:Place) ON (c.id)")
session.run("CREATE POINT INDEX places_index_point IF NOT EXISTS FOR (c:Place) ON (c.point)")
}
def removeIndexes(session: Session): Unit = {
session.run("DROP INDEX regions_index IF EXISTS")
session.run("DROP INDEX departments_index IF EXISTS")
session.run("DROP INDEX places_index IF EXISTS")
session.run("DROP INDEX places_index_point IF EXISTS")
}
def dataImportUsingCode(session: Session): Unit = {
val countryQuery = """CREATE (:Country {id:$id, name:$name})"""
for (c <- countries) {
println("Importing $c")
session.run(
countryQuery,
Map("id" -> c.id, "name" -> c.name).asJava
)
}
val regionCreateQuery = """MATCH (to:Country {id:$countryId})
|CREATE (:Region {id:$id, name: $name, code: $code})-[:InCountry]->(to)""".stripMargin
for (r <- regions) {
println(s"Importing $r")
session.run(
regionCreateQuery,
Map("id" -> r.id, "name" -> r.name, "code" -> r.code, "countryId" -> r.countryId).asJava
)
}
val departmentCreateQuery = """MATCH (to:Region {id:$regionId})
|CREATE (:Department {id:$id, name: $name, code: $code})-[:InRegion]->(to)""".stripMargin
for (d <- departments) {
println(s"Importing $d")
session.run(
departmentCreateQuery,
Map("id" -> d.id, "name" -> d.name, "code" -> d.code, "regionId" -> d.region.id).asJava
)
}
val placeCreateQuery = """MATCH (to:Department {id:$depId})
|CREATE (:Place {id:$id, name:$name, point:point({latitude:$lat, longitude: $lon}), code:$code})-[:InDepartment]->(to)""".stripMargin
for (c <- postalCodes.filter(_.gps.nonEmpty)) {
println(s"Importing $c")
session.run(
placeCreateQuery,
Map("id" -> c.id, "name" -> c.name, "code" -> c.code, "lat" -> c.gps.get.latitude, "lon" -> c.gps.get.longitude, "depId" -> c.departmentId).asJava
)
}
}
def dataAnalysisPlacesCount(session: Session): Unit = {
val query = """MATCH (n:Place) RETURN count(n) AS n"""
val response = session.run(query)
val placesCount = response.single().get("n").asInt()
println("----------------------------------------------------------")
println(s"places count $placesCount")
}
def dataAnalysisParisBrestDistance(session: Session): Unit = {
val query =
"""
|WITH
| point({longitude: 2.333333, latitude: 48.866667}) AS paris,
| point({longitude: -4.48333, latitude: 48.400002}) AS brest
|RETURN point.distance(paris, brest) as dist
|""".stripMargin
val dist = session.run(query).single().get("dist").asDouble()
println("----------------------------------------------------------")
println(s"distance paris-brest $dist meters")
}
def dataAnalysisPlacesNearFranceCenter(session: Session): Unit = {
val query =
"""
|MATCH (n:Place)
|WITH n.point AS point, point({longitude: 2.576461, latitude: 46.513493}) AS center
|WHERE point.distance(center, point) < 10000
|RETURN count(point) AS count
|""".stripMargin
val count = session.run(query).single().get("count").asDouble()
println("----------------------------------------------------------")
println(s"They are $count places 10km around the center of France")
}
def dataAnalysisPlacesByRegion(session: Session): Unit = {
val query =
"""
|MATCH (r:Region)
|WITH r
|MATCH (r)<--(d:Department)<--(c:Place)
|RETURN r.name AS regionName, count(c) AS count
|ORDER BY count DESC
|""".stripMargin
val response = session.run(query)
val results = response.list().asScala
println("----------------------------------------------------------")
results.foreach { result =>
val regionName = result.get("regionName").asString()
val placesCount = result.get("count").asInt()
println(s"$placesCount places in region $regionName")
}
}
def dataAnalysisPlacesWithinBoundingBox(session: Session): Unit = {
val query =
"""
|MATCH (n:Place)
|WITH
| n,
| point({longitude: 8.613267, latitude: 41.316747}) AS lowerLeft,
| point({longitude: 9.624277, latitude: 43.061031}) AS upperRight
|WHERE point.withinBBox(n.point, lowerLeft, upperRight)
|RETURN count(n) AS count
|""".stripMargin
val count = session.run(query).single().get("count").asDouble()
println("----------------------------------------------------------")
println(s"They are $count places in corsica")
}
def dataAnalysisDensityByZone(session: Session): Unit = {
val step = 0.2d
val istep = step - 0.0000001d
val longitudes = LazyList.iterate(-4.8d)(_ + step).takeWhile(_ < 8.3d)
val latitudes = LazyList.iterate(42.2d)(_ + step).takeWhile(_ < 51.2d)
val params = Map(
"step" -> istep,
"latitudes" -> latitudes.toArray,
"longitudes" -> longitudes.toArray
)
val query =
"""UNWIND $latitudes as lat
|UNWIND $longitudes as lon
|MATCH (p:Place)
|WHERE point.withinBBox(
| p.point,
| point({longitude: lon, latitude: lat}),
| point({longitude: (lon + $step), latitude: (lat + $step)})
|)
|RETURN lat, lon, count(p) AS count
|ORDER BY -count
|LIMIT 5
|""".stripMargin
val countByZone =
session
.run(query, params.asJava)
.list()
.asScala
.map(r => ((r.get("lat").asDouble(), r.get("lon").asDouble()), r.get("count").asInt()))
.toList
println("----------------------------------------------------------")
println(s"Zone with highest places density (TOP 5)")
countByZone.foreach { case ((lat, lon), count) =>
println(f" $count%d for bbox ($lat%.3f,$lon%.3f)->(${lat + step}%.3f,${lon + step}%.3f)")
}
}
def dataAnalysis(session: Session): Unit = {
val started = System.currentTimeMillis()
dataAnalysisPlacesCount(session)
dataAnalysisParisBrestDistance(session)
dataAnalysisPlacesNearFranceCenter(session)
dataAnalysisPlacesByRegion(session)
dataAnalysisPlacesWithinBoundingBox(session)
dataAnalysisDensityByZone(session)
val duration = System.currentTimeMillis() - started
println(s"everything executed in ${duration}ms")
}
// =====================================================================================
def withEmbeddedDatabaseAndFixture(): Unit = {
val builder =
Neo4jBuilders
.newInProcessBuilder()
.withFixture(dataImportUsingFixture()) // ~67minutes
Using(builder.build()) { embedded =>
Using(GraphDatabase.driver(embedded.boltURI(), AuthTokens.none())) { driver =>
Using(driver.session()) { session =>
createIndexes(session)
dataAnalysis(session)
}.tap(r => println(r))
}.tap(r => println(r))
}.tap(r => println(r))
}
// =====================================================================================
def withEmbeddedDatabase(): Unit = {
val builder =
Neo4jBuilders
.newInProcessBuilder()
Using(builder.build()) { embedded =>
Using(GraphDatabase.driver(embedded.boltURI(), AuthTokens.none())) { driver =>
Using(driver.session()) { session =>
createIndexes(session)
dataImportUsingCode(session) // ~48seconds (~54seconds with indexes on ids)
dataAnalysis(session)
}.tap(r => println(r))
}.tap(r => println(r))
}.tap(r => println(r))
}
// =====================================================================================
def forConsoleUsagesWithExistingNEO4J(): Unit = {
// for live test, read to copy/paste instructions
val session =
GraphDatabase
.driver("neo4j://127.0.0.1:7687", AuthTokens.basic("neo4j", "neo4j"))
.session()
}
def forConsoleUsages(): Unit = {
// for live test, read to copy/paste instructions
val builder = Neo4jBuilders.newInProcessBuilder()
val embedded = builder.build()
val driver = GraphDatabase.driver(embedded.boltURI(), AuthTokens.none())
val session = driver.session()
}
def moreExamplesReadyToCopyPaste(session: Session): Unit = {
// ---------------------------------
// an example query, distance between two points WGS84 3D
val dist =
session
.run(
"""RETURN
|point.distance(
| point({longitude: 2.333333, latitude: 48.866667, height: 30.0}),
| point({longitude: 2.333333, latitude: 48.866667, height: 32.0})
|) AS dist
|""".stripMargin
)
.single()
.get("dist")
.asDouble()
// ---------------------------------
// loop like behavior => in fact cartesian product between row and col
val coords =
session
.run(
"""UNWIND [1,2,3,4,5] as row
|UNWIND [1,2,3,4,5] as col
|RETURN row AS y, col AS x
|""".stripMargin
)
.list()
.asScala
.map(r => (r.get("x").asInt(), r.get("y").asInt()))
// ---------------------------------
// 3D Bounding box - WGS84
val record =
session
.run(
"""RETURN
| point.withinBBox(
| point({longitude: 2.222222, latitude: 48.333333, height: 25.0}),
| point({longitude: 2.111111, latitude: 48.111111, height: 20.0}),
| point({longitude: 2.333333, latitude: 48.866667, height: 32.0})
| ) AS isIn1,
| point.withinBBox(
| point({longitude: 2.222222, latitude: 48.333333, height: 19.0}),
| point({longitude: 2.111111, latitude: 48.111111, height: 20.0}),
| point({longitude: 2.333333, latitude: 48.866667, height: 32.0})
| ) AS isIn2
|""".stripMargin
)
.single()
val isIn1 = record.get("isIn1").asBoolean()
val isIn2 = record.get("isIn2").asBoolean()
// ---------------------------------
}
// =====================================================================================
//withEmbeddedDatabaseAndFixture()
withEmbeddedDatabase()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment