Skip to content

Instantly share code, notes, and snippets.

@dacr
Last active June 21, 2024 21:09
Show Gist options
  • Save dacr/0943af77e448644b5cf6a4917b04df26 to your computer and use it in GitHub Desktop.
Save dacr/0943af77e448644b5cf6a4917b04df26 to your computer and use it in GitHub Desktop.
postal code opendata data sources. / published by https://github.com/dacr/code-examples-manager #095ffa72-b4d9-4f3d-85b2-b3e69a302ac4/f90b5950c7d494c673b482f71551350c45248753
// summary : postal code opendata data sources.
// keywords : scala, opendata, data-analysis, requests, postal-codes, @testable
// publish : gist
// authors : David Crosson
// license : Apache NON-AI License Version 2.0 (https://raw.githubusercontent.com/non-ai-licenses/non-ai-licenses/main/NON-AI-APACHE2)
// id : 095ffa72-b4d9-4f3d-85b2-b3e69a302ac4
// created-on : 2020-10-10T16:21:18Z
// managed-by : https://github.com/dacr/code-examples-manager
// run-with : scala-cli $file
// ---------------------
//> using scala 3.4.2
//> using dep com.lihaoyi::requests:0.8.3
//> using dep com.lihaoyi::os-lib:0.10.2
// ---------------------
val openDataPostalCodesHome = "https://www.data.gouv.fr/fr/datasets/base-officielle-des-codes-postaux/"
//val openDataPostalCodesDataSourceURI = "https://www.data.gouv.fr/fr/datasets/r/3062548d-f510-4ded-ba38-a64126a5331b"
val openDataPostalCodesDataSourceURI = "https://datanova.laposte.fr/data-fair/api/v1/datasets/laposte-hexasmal/metadata-attachments/base-officielle-codes-postaux.csv"
val cachedResponseFile = os.pwd / "base-officielle-codes-postaux.csv"
// ---------------------------------------------------------------------------------------------------------------------
case class Point(
latitude: Double,
longitude: Double
)
case class PostalCode(
townCode: String,
townName: String,
postalCode: String,
secondaryTownName: Option[String],
deliveryLabel: Option[String],
gps: Option[Point]
) {
val countyCode = townCode.take(if (townCode.startsWith("97")) 3 else 2)
}
def stringToGPS(input: String): Option[Point] = {
input.split(",").map(_.trim) match {
case Array(latitude, longitude) =>
for {
lat <- latitude.toDoubleOption
lon <- longitude.toDoubleOption
} yield Point(lat, lon)
case _ => None
}
}
def stringToPostalCode(input: String): Option[PostalCode] = {
input.trim // with some basic hack parsing to support both format, opendata and laposte ones
.replaceAll("\",\"", ";")
.replaceAll("^\"(.*)\"$", "$1")
.replaceAll("\",,\"", ";;")
.replaceAll("\",,", ";;")
.split(";") match {
case Array(townCode, townName, postalCode, "", deliveryLabel, position) =>
Option(PostalCode(townCode, townName, postalCode, None, Some(deliveryLabel).filter(_.trim.nonEmpty), stringToGPS(position)))
case Array(townCode, townName, postalCode, secondaryTownName, deliveryLabel, position) =>
Option(PostalCode(townCode, townName, postalCode, Some(secondaryTownName).filter(_.trim.nonEmpty), Some(deliveryLabel).filter(_.trim.nonEmpty), stringToGPS(position)))
case Array(townCode, townName, postalCode, secondaryTownName, deliveryLabel) =>
Option(PostalCode(townCode, townName, postalCode, Some(secondaryTownName).filter(_.trim.nonEmpty), Some(deliveryLabel).filter(_.trim.nonEmpty), None))
case Array(townCode, townName, postalCode, secondaryTownName) =>
Option(PostalCode(townCode, townName, postalCode, Some(secondaryTownName).filter(_.trim.nonEmpty), None, None))
case data =>
println("Unmanaged input : " + data.mkString(";"))
None
}
}
val postalCodes = {
val response =
if (os.exists(cachedResponseFile)) os.read(cachedResponseFile).split("\n").toVector
else {
val data = requests.get(openDataPostalCodesDataSourceURI)
os.write(cachedResponseFile, data)
data.lines()
}
response
.drop(1) // first line == the CSV labels
.flatMap(stringToPostalCode)
}
val townByCounty = postalCodes.toList.groupMap(_.countyCode)(_.townName)
val longestTownName = postalCodes.maxByOption(_.townName.count(_.isLetter))
val shortestTownName = postalCodes.minByOption(_.townName.count(_.isLetter))
val countyWithMostTowns = townByCounty.maxByOption { case (countyCode, towns) => towns.size }.map { case (countyCode, towns) => countyCode -> towns.size }
postalCodes
.sortBy(_.townCode)
.foreach(println)
println(
s"""postalCodeCount : ${postalCodes.size}
|longestTownName : ${longestTownName}
|shortestTownName : ${shortestTownName}
|countyWithMostTowns : ${countyWithMostTowns}
|""".stripMargin
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment