Skip to content

Instantly share code, notes, and snippets.

@d3d9
Last active November 8, 2020 22:46
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save d3d9/263abc2b1b44bc62469ff352d5690619 to your computer and use it in GitHub Desktop.
Save d3d9/263abc2b1b44bc62469ff352d5690619 to your computer and use it in GitHub Desktop.
Hausnummernvergleich zwischen amtlichen Daten der Stadt Hagen und Daten von OpenStreetMap.
[Sources]
AmtUrl = http://www.stadtplan.hagen.de/StrVz/Hauskoordinaten.csv
AmtSep = ;
AmtEnc = cp1252
AmtCRS = EPSG:4647
OsmUrl = http://overpass-api.de/api/interpreter?data=%5Bout%3Acsv%28%3A%3Alat%2C%3A%3Alon%2C%3A%3Aid%2C%3A%3Atype%2C%22addr%3Apostcode%22%2C%22addr%3Astreet%22%2C%22addr%3Ahousenumber%22%2C%22addr%3Aplace%22%3Btrue%3B%22%3B%22%29%5D%5Btimeout%3A200%5D%3B%0Aarea%283601800297%29-%3E.searchArea%3B%0A%28%0A%20%20node%5B%22addr%3Ahousenumber%22%5D%28area.searchArea%29%3B%0A%20%20way%5B%22addr%3Ahousenumber%22%5D%28area.searchArea%29%3B%0A%29%3B%0Aout%20center%3B
;OsmPlace = Hagen
OsmSep = ;
OsmEnc = utf-8
OsmCRS = EPSG:4326
[Columns]
AmtPLZ = Postleitzahl
AmtStr = Straßenname
AmtNr = Hausnummer
AmtZus = Zusatz
AmtX = Rechtswert
AmtY = Hochwert
[Data]
Replacements = (("Dr.-Lammert-Weg","Doktor-Lammert-Weg"),
("Joh.-Gottlieb-Fichte-Str.","Johann-Gottlieb-Fichte-Straße"),
("Johann-F.-Oberlin-Str.","Johann-Friedrich-Oberlin-Straße"),
("Prof.-Dr.-Schemann-Weg","Professor-Doktor-Schemann-Weg"))
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pandas
import csv
import argparse
import configparser
from ast import literal_eval
from urllib.request import urlopen
from datetime import datetime
from pyproj import Transformer
from scipy.spatial import distance
parser = argparse.ArgumentParser()
parser.add_argument('-c', '--config', action='store', help='Config file name', type=str, required=True)
args = parser.parse_args()
config = configparser.ConfigParser(interpolation=None)
config.read(args.config)
'''
for section in config.sections():
print(section)
for c in config[section]:
print(c,config[section][c])
'''
'''
Query:
[out:csv(::lat,::lon,::id,::type,"addr:postcode","addr:street","addr:housenumber","addr:place";true;";")][timeout:200];
{{geocodeArea:"????????"}}->.searchArea;
(
node["addr:housenumber"](area.searchArea);
way["addr:housenumber"](area.searchArea);
);
out center;
'''
print("Hausnummern werden geladen ...")
r_amt = urlopen(config["Sources"]["AmtUrl"])
r_osm = urlopen(config["Sources"]["OsmUrl"])
amt = pandas.read_csv(r_amt, sep=config["Sources"]["AmtSep"], encoding=config["Sources"]["AmtEnc"], dtype=str)
osm = pandas.read_csv(r_osm, sep=config["Sources"]["OsmSep"], encoding=config["Sources"]["OsmEnc"], dtype=str)
print("Vergleich wird durchgeführt ...")
amtStr = config["Columns"]["AmtStr"]
amtNr = config["Columns"]["AmtNr"]
# todo: Manche Quellen haben Hausnummer und Zusatz möglicherweise in einem Feld
amtZus = config["Columns"]["AmtZus"]
replacements = literal_eval(config["Data"]["Replacements"])
kombinierte = []
for index, row in amt.iterrows():
strasse = row[amtStr]
for x in replacements:
strasse = strasse.replace(x[0], x[1])
adresse = strasse + ' ' + row[amtNr]
zusatz = row[amtZus]
if(pandas.notnull(zusatz)):
adresse += zusatz
kombinierte.append(adresse)
amt = amt.assign(kombiniert=kombinierte)
kombinierte = []
for index, row in osm.iterrows():
adresse = row["addr:street"] + ' ' + row["addr:housenumber"]
kombinierte.append(adresse)
osm = osm.assign(kombiniert=kombinierte)
merged = pandas.merge(amt, osm, on='kombiniert', how="outer", indicator=True)
amtX = config["Columns"]["AmtX"]
amtY = config["Columns"]["AmtY"]
amtPLZ = config["Columns"]["AmtPLZ"]
transformer = Transformer.from_crs(config["Sources"]["OsmCRS"], config["Sources"]["AmtCRS"])
treffer = []
zuwenig = []
zuviel = []
for index, row in merged.iterrows():
if row['_merge'] == "both":
zusatz = row[amtZus]
if pandas.isnull(zusatz):
zusatz = ""
# todo: wie will man mit duplicates umgehen?
diff = round(distance.euclidean((float(row[amtX].replace(",", ".")),
float(row[amtY].replace(",", "."))),
transformer.transform(row["@lat"], row["@lon"])), 2)
treffer.append([row[amtX], row[amtY], row["@lat"], row["@lon"], diff, row["@type"][0]+row["@id"], row[amtPLZ], row[amtStr], row[amtNr], zusatz])
elif row['_merge'] == "left_only":
zusatz = row[amtZus]
if pandas.isnull(zusatz):
zusatz = ""
zuwenig.append([row[amtX], row[amtY], row[amtPLZ], row[amtStr], row[amtNr], zusatz])
elif row['_merge'] == "right_only":
zuviel.append([row["@lat"], row["@lon"], row["@type"][0]+row["@id"], row["addr:postcode"], row["addr:street"], row["addr:housenumber"]])
day = datetime.now().strftime("%d%m%y")
trefferfn = "auswertung" + day + "-treffer.csv"
with open(trefferfn, 'w', newline='') as f:
writer = csv.writer(f, delimiter=';', quoting=csv.QUOTE_MINIMAL)
writer.writerow([amtX, amtY, "lat_osm", "lon_osm", "distance", "full_id", amtPLZ, amtStr, amtNr, amtZus])
writer.writerows(treffer)
print("Datei " + trefferfn + " geschrieben")
zuwenigfn = "auswertung" + day + "-zuwenig.csv"
with open(zuwenigfn, 'w', newline='') as f:
writer = csv.writer(f, delimiter=';', quoting=csv.QUOTE_MINIMAL)
writer.writerow([amtX, amtY, amtPLZ, amtStr, amtNr, amtZus])
writer.writerows(zuwenig)
print("Datei " + zuwenigfn + " geschrieben")
zuvielfn = "auswertung" + day + "-zuviel.csv"
with open(zuvielfn, 'w', newline='') as f:
writer = csv.writer(f, delimiter=';', quoting=csv.QUOTE_MINIMAL)
writer.writerow(["@lat", "@lon", "full_id", "addr:postcode", "addr:street", "addr:housenumber"])
writer.writerows(zuviel)
print("Datei " + zuvielfn + " geschrieben")
@grischard
Copy link

Für meine csv-Verbesserungen und Ergänzungen hab ich https://github.com/grischard/csventrifuge gebaut. Dein Code scheint aber viel eleganter zu sein!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment