Skip to content

Instantly share code, notes, and snippets.

@grischard
Created February 13, 2020 09:27
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save grischard/a108be225a41df50ce04a081fe7c8eeb to your computer and use it in GitHub Desktop.
Save grischard/a108be225a41df50ce04a081fe7c8eeb to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Return GeoJSON centroids with population for each locality in Luxembourg.
BD-Adresses doesn't include communes, so we use the OpenStreetMap community's
csventrifuge output, which has been enriched. The output of this script is
© OpenStreetMap Contributors, see https://openstreetmap.org/copyright.
Some localities are spread over communes (yeah), e.g. Dirbach which is in three.
- Downloads the latest addresses produced by csventrifuge (enriched by commune)
- Average the position of all places
- Conflate population from rnpp
- Spit out geojson
Run like :
python3 place-centroid.py > place-centroids.geojson
"""
import pandas as pd
import geojson
def data2geojson(df):
# Converts our dataframe to geojson. Column names hardcoded.
features = []
insert_features = lambda X: features.append(
geojson.Feature(
geometry=geojson.Point(
(round(X["lon_wgs84"], 4), round(X["lat_wgs84"], 4))
),
properties=dict(
localite=X["localite"], commune=X["commune"], population=X["population"]
),
)
)
df.apply(insert_features, axis=1)
return geojson.dumps(
geojson.FeatureCollection(features), sort_keys=True, ensure_ascii=False
)
# The endpoint that redirects to the most recent version of the
# addresses in geojson.
ADDRESSES_CSV = "https://openstreetmap.lu/luxembourg-addresses.csv"
df = pd.read_csv(ADDRESSES_CSV)
# Drop useless labels
df = df.drop(
labels=[
"rue",
"numero",
"code_postal",
"id_caclr_rue",
"id_caclr_bat",
"coord_est_luref",
"coord_nord_luref",
"id_geoportail",
"code_commune",
],
axis=1,
)
# Remove commune in parentheses, information is redundant and makes conflation
# more difficult
df = df.replace(to_replace=r" \(.*\)", value="", regex=True)
# People live there, but there's no address in BD-Addresses. Hardcode a point.
missingaddresses = [
["Helperknapp", "Kuelbecherhaff", 49.72131, 6.06763],
["Larochette", "Meysembourg", 49.76963, 6.18718],
["Waldbillig", "Grundhof", 49.81823, 6.32157],
["Wormeldange", "Dreiborn", 49.61974, 6.39383],
]
df = df.append(
pd.DataFrame(
missingaddresses, columns=["commune", "localite", "lat_wgs84", "lon_wgs84"]
)
)
# Group by the mean lat/lon to get centre
df = df.groupby(["commune", "localite"]).mean()
POPULATION_CSV = (
"https://data.public.lu/fr/datasets/r/cd165f0a-feb7-401f-a440-d42695e63738"
)
# cp437 is the encoding of the original IBM PC. Why they use it is anyone's guess.
popdf = pd.read_csv(POPULATION_CSV, encoding="cp437")
# Drop useless labels
popdf = popdf.drop(labels="COMMUNE_CODE", axis=1)
# Remove commune in parentheses, information is redundant and makes conflation
# more difficult
popdf = popdf.replace(to_replace=r" \(.*\)", value="", regex=True)
# Rename stuff to match csventrifuge output
popdf = popdf.rename(
columns={"COMMUNE": "commune", "LOCALITE": "localite", "POPULATION": "population"}
)
popdf["commune"].replace({"Redange/Attert": "Redange-sur-Attert"}, inplace=True)
popdf["localite"].replace(
{
"Redange/Attert": "Redange",
"Roodt/Eisch": "Roodt-sur-Eisch",
"Goebelsmuehle": "Goebelsmühle",
},
inplace=True,
)
# Join both together
richdf = pd.merge(df, popdf, on=["commune", "localite"], how="outer")
# Places with no population get a 0
richdf["population"].fillna(0, inplace=True)
print(data2geojson(richdf))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment