Last active
June 21, 2024 21:09
-
-
Save dacr/0943af77e448644b5cf6a4917b04df26 to your computer and use it in GitHub Desktop.
postal code opendata data sources. / published by https://github.com/dacr/code-examples-manager #095ffa72-b4d9-4f3d-85b2-b3e69a302ac4/f90b5950c7d494c673b482f71551350c45248753
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// summary : postal code opendata data sources. | |
// keywords : scala, opendata, data-analysis, requests, postal-codes, @testable | |
// publish : gist | |
// authors : David Crosson | |
// license : Apache NON-AI License Version 2.0 (https://raw.githubusercontent.com/non-ai-licenses/non-ai-licenses/main/NON-AI-APACHE2) | |
// id : 095ffa72-b4d9-4f3d-85b2-b3e69a302ac4 | |
// created-on : 2020-10-10T16:21:18Z | |
// managed-by : https://github.com/dacr/code-examples-manager | |
// run-with : scala-cli $file | |
// --------------------- | |
//> using scala 3.4.2 | |
//> using dep com.lihaoyi::requests:0.8.3 | |
//> using dep com.lihaoyi::os-lib:0.10.2 | |
// --------------------- | |
val openDataPostalCodesHome = "https://www.data.gouv.fr/fr/datasets/base-officielle-des-codes-postaux/" | |
//val openDataPostalCodesDataSourceURI = "https://www.data.gouv.fr/fr/datasets/r/3062548d-f510-4ded-ba38-a64126a5331b" | |
val openDataPostalCodesDataSourceURI = "https://datanova.laposte.fr/data-fair/api/v1/datasets/laposte-hexasmal/metadata-attachments/base-officielle-codes-postaux.csv" | |
val cachedResponseFile = os.pwd / "base-officielle-codes-postaux.csv" | |
// --------------------------------------------------------------------------------------------------------------------- | |
case class Point( | |
latitude: Double, | |
longitude: Double | |
) | |
case class PostalCode( | |
townCode: String, | |
townName: String, | |
postalCode: String, | |
secondaryTownName: Option[String], | |
deliveryLabel: Option[String], | |
gps: Option[Point] | |
) { | |
val countyCode = townCode.take(if (townCode.startsWith("97")) 3 else 2) | |
} | |
def stringToGPS(input: String): Option[Point] = { | |
input.split(",").map(_.trim) match { | |
case Array(latitude, longitude) => | |
for { | |
lat <- latitude.toDoubleOption | |
lon <- longitude.toDoubleOption | |
} yield Point(lat, lon) | |
case _ => None | |
} | |
} | |
def stringToPostalCode(input: String): Option[PostalCode] = { | |
input.trim // with some basic hack parsing to support both format, opendata and laposte ones | |
.replaceAll("\",\"", ";") | |
.replaceAll("^\"(.*)\"$", "$1") | |
.replaceAll("\",,\"", ";;") | |
.replaceAll("\",,", ";;") | |
.split(";") match { | |
case Array(townCode, townName, postalCode, "", deliveryLabel, position) => | |
Option(PostalCode(townCode, townName, postalCode, None, Some(deliveryLabel).filter(_.trim.nonEmpty), stringToGPS(position))) | |
case Array(townCode, townName, postalCode, secondaryTownName, deliveryLabel, position) => | |
Option(PostalCode(townCode, townName, postalCode, Some(secondaryTownName).filter(_.trim.nonEmpty), Some(deliveryLabel).filter(_.trim.nonEmpty), stringToGPS(position))) | |
case Array(townCode, townName, postalCode, secondaryTownName, deliveryLabel) => | |
Option(PostalCode(townCode, townName, postalCode, Some(secondaryTownName).filter(_.trim.nonEmpty), Some(deliveryLabel).filter(_.trim.nonEmpty), None)) | |
case Array(townCode, townName, postalCode, secondaryTownName) => | |
Option(PostalCode(townCode, townName, postalCode, Some(secondaryTownName).filter(_.trim.nonEmpty), None, None)) | |
case data => | |
println("Unmanaged input : " + data.mkString(";")) | |
None | |
} | |
} | |
val postalCodes = { | |
val response = | |
if (os.exists(cachedResponseFile)) os.read(cachedResponseFile).split("\n").toVector | |
else { | |
val data = requests.get(openDataPostalCodesDataSourceURI) | |
os.write(cachedResponseFile, data) | |
data.lines() | |
} | |
response | |
.drop(1) // first line == the CSV labels | |
.flatMap(stringToPostalCode) | |
} | |
val townByCounty = postalCodes.toList.groupMap(_.countyCode)(_.townName) | |
val longestTownName = postalCodes.maxByOption(_.townName.count(_.isLetter)) | |
val shortestTownName = postalCodes.minByOption(_.townName.count(_.isLetter)) | |
val countyWithMostTowns = townByCounty.maxByOption { case (countyCode, towns) => towns.size }.map { case (countyCode, towns) => countyCode -> towns.size } | |
postalCodes | |
.sortBy(_.townCode) | |
.foreach(println) | |
println( | |
s"""postalCodeCount : ${postalCodes.size} | |
|longestTownName : ${longestTownName} | |
|shortestTownName : ${shortestTownName} | |
|countyWithMostTowns : ${countyWithMostTowns} | |
|""".stripMargin | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment