Last active June 21, 2024 21:09
postal code opendata data sources. / published by #095ffa72-b4d9-4f3d-85b2-b3e69a302ac4/f90b5950c7d494c673b482f71551350c45248753
// summary : postal code opendata data sources.
// keywords : scala, opendata, data-analysis, requests, postal-codes, @testable
// publish : gist
// authors : David Crosson
// license : Apache NON-AI License Version 2.0 (
// id : 095ffa72-b4d9-4f3d-85b2-b3e69a302ac4
// created-on : 2020-10-10T16:21:18Z
// managed-by :
// run-with : scala-cli $file
// ---------------------
//> using scala 3.4.2
//> using dep com.lihaoyi::requests:0.8.3
//> using dep com.lihaoyi::os-lib:0.10.2
// ---------------------
val openDataPostalCodesHome = ""
//val openDataPostalCodesDataSourceURI = ""
val openDataPostalCodesDataSourceURI = ""
val cachedResponseFile = os.pwd / "base-officielle-codes-postaux.csv"
// ---------------------------------------------------------------------------------------------------------------------
case class Point(
latitude: Double,
longitude: Double
case class PostalCode(
townCode: String,
townName: String,
postalCode: String,
secondaryTownName: Option[String],
deliveryLabel: Option[String],
gps: Option[Point]
) {
val countyCode = townCode.take(if (townCode.startsWith("97")) 3 else 2)
def stringToGPS(input: String): Option[Point] = {
input.split(",").map(_.trim) match {
case Array(latitude, longitude) =>
for {
lat <- latitude.toDoubleOption
lon <- longitude.toDoubleOption
} yield Point(lat, lon)
case _ => None
def stringToPostalCode(input: String): Option[PostalCode] = {
input.trim // with some basic hack parsing to support both format, opendata and laposte ones
.replaceAll("\",\"", ";")
.replaceAll("^\"(.*)\"$", "$1")
.replaceAll("\",,\"", ";;")
.replaceAll("\",,", ";;")
.split(";") match {
case Array(townCode, townName, postalCode, "", deliveryLabel, position) =>
Option(PostalCode(townCode, townName, postalCode, None, Some(deliveryLabel).filter(_.trim.nonEmpty), stringToGPS(position)))
case Array(townCode, townName, postalCode, secondaryTownName, deliveryLabel, position) =>
Option(PostalCode(townCode, townName, postalCode, Some(secondaryTownName).filter(_.trim.nonEmpty), Some(deliveryLabel).filter(_.trim.nonEmpty), stringToGPS(position)))
case Array(townCode, townName, postalCode, secondaryTownName, deliveryLabel) =>
Option(PostalCode(townCode, townName, postalCode, Some(secondaryTownName).filter(_.trim.nonEmpty), Some(deliveryLabel).filter(_.trim.nonEmpty), None))
case Array(townCode, townName, postalCode, secondaryTownName) =>
Option(PostalCode(townCode, townName, postalCode, Some(secondaryTownName).filter(_.trim.nonEmpty), None, None))
case data =>
println("Unmanaged input : " + data.mkString(";"))
val postalCodes = {
val response =
if (os.exists(cachedResponseFile))"\n").toVector
else {
val data = requests.get(openDataPostalCodesDataSourceURI)
os.write(cachedResponseFile, data)
.drop(1) // first line == the CSV labels
val townByCounty = postalCodes.toList.groupMap(_.countyCode)(_.townName)
val longestTownName = postalCodes.maxByOption(_.townName.count(_.isLetter))
val shortestTownName = postalCodes.minByOption(_.townName.count(_.isLetter))
val countyWithMostTowns = townByCounty.maxByOption { case (countyCode, towns) => towns.size }.map { case (countyCode, towns) => countyCode -> towns.size }
s"""postalCodeCount : ${postalCodes.size}
|longestTownName : ${longestTownName}
|shortestTownName : ${shortestTownName}
|countyWithMostTowns : ${countyWithMostTowns}
