Skip to content

Instantly share code, notes, and snippets.

@d3d9 d3d9/hagen.ini
Last active Mar 27, 2018

Embed
What would you like to do?
Hausnummernvergleich zwischen amtlichen Daten der Stadt Hagen und Daten von OpenStreetMap.
[Sources]
AmtUrl = http://www.stadtplan.hagen.de/StrVz/Hauskoordinaten.csv
AmtSep = ;
AmtEnc = cp1252
AmtCRS = EPSG:4647
OsmUrl = http://overpass-api.de/api/interpreter?data=%5Bout%3Acsv%28%3A%3Alat%2C%3A%3Alon%2C%3A%3Aid%2C%3A%3Atype%2C%22addr%3Apostcode%22%2C%22addr%3Astreet%22%2C%22addr%3Ahousenumber%22%2C%22addr%3Aplace%22%3Btrue%3B%22%3B%22%29%5D%5Btimeout%3A200%5D%3B%0Aarea%283601800297%29-%3E.searchArea%3B%0A%28%0A%20%20node%5B%22addr%3Ahousenumber%22%5D%28area.searchArea%29%3B%0A%20%20way%5B%22addr%3Ahousenumber%22%5D%28area.searchArea%29%3B%0A%29%3B%0Aout%20center%3B
;OsmPlace = Hagen
OsmSep = ;
OsmEnc = utf-8
OsmCRS = EPSG:4326
[Columns]
AmtPLZ = Postleitzahl
AmtStr = Straßenname
AmtNr = Hausnummer
AmtZus = Zusatz
AmtX = Rechtswert
AmtY = Hochwert
[Data]
Replacements = (("Dr.-Lammert-Weg","Doktor-Lammert-Weg"),
("Joh.-Gottlieb-Fichte-Str.","Johann-Gottlieb-Fichte-Straße"),
("Johann-F.-Oberlin-Str.","Johann-Friedrich-Oberlin-Straße"),
("Prof.-Dr.-Schemann-Weg","Professor-Doktor-Schemann-Weg"))
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pandas
import csv
import argparse
import configparser
from ast import literal_eval
from urllib.request import urlopen
from datetime import datetime
from pyproj import Proj, transform
from scipy.spatial import distance
parser = argparse.ArgumentParser()
parser.add_argument('-c', '--config', action='store', help='Config file name', type=str, required=True)
args = parser.parse_args()
config = configparser.ConfigParser(interpolation=None)
config.read(args.config)
'''
for section in config.sections():
print(section)
for c in config[section]:
print(c,config[section][c])
'''
'''
Query:
[out:csv(::lat,::lon,::id,::type,"addr:postcode","addr:street","addr:housenumber","addr:place";true;";")][timeout:200];
{{geocodeArea:"????????"}}->.searchArea;
(
node["addr:housenumber"](area.searchArea);
way["addr:housenumber"](area.searchArea);
);
out center;
'''
print("Hausnummern werden geladen ...")
r_amt = urlopen(config["Sources"]["AmtUrl"])
r_osm = urlopen(config["Sources"]["OsmUrl"])
amt = pandas.read_csv(r_amt, sep=config["Sources"]["AmtSep"], encoding=config["Sources"]["AmtEnc"], dtype=str)
osm = pandas.read_csv(r_osm, sep=config["Sources"]["OsmSep"], encoding=config["Sources"]["OsmEnc"], dtype=str)
print("Vergleich wird durchgeführt ...")
amtStr = config["Columns"]["AmtStr"]
amtNr = config["Columns"]["AmtNr"]
# todo: Manche Quellen haben Hausnummer und Zusatz möglicherweise in einem Feld
amtZus = config["Columns"]["AmtZus"]
replacements = literal_eval(config["Data"]["Replacements"])
osmProj = Proj(init=config["Sources"]["OsmCRS"])
amtProj = Proj(init=config["Sources"]["AmtCRS"])
kombinierte = []
for index, row in amt.iterrows():
strasse = row[amtStr]
for x in replacements:
strasse = strasse.replace(x[0], x[1])
adresse = strasse + ' ' + row[amtNr]
zusatz = row[amtZus]
if(pandas.notnull(zusatz)):
adresse += zusatz
kombinierte.append(adresse)
amt = amt.assign(kombiniert=kombinierte)
kombinierte = []
for index, row in osm.iterrows():
adresse = row["addr:street"] + ' ' + row["addr:housenumber"]
kombinierte.append(adresse)
osm = osm.assign(kombiniert=kombinierte)
merged = pandas.merge(amt, osm, on='kombiniert', how="outer", indicator=True)
amtX = config["Columns"]["AmtX"]
amtY = config["Columns"]["AmtY"]
amtPLZ = config["Columns"]["AmtPLZ"]
treffer = []
zuwenig = []
zuviel = []
for index, row in merged.iterrows():
if row['_merge'] == "both":
zusatz = row[amtZus]
if pandas.isnull(zusatz):
zusatz = ""
# todo: wie will man mit duplicates umgehen?
diff = round(distance.euclidean((float(row[amtX].replace(",", ".")),
float(row[amtY].replace(",", "."))),
transform(osmProj, amtProj, row["@lon"], row["@lat"])), 2)
treffer.append([row[amtX], row[amtY], row["@lat"], row["@lon"], diff, row["@type"][0]+row["@id"], row[amtPLZ], row[amtStr], row[amtNr], zusatz])
elif row['_merge'] == "left_only":
zusatz = row[amtZus]
if pandas.isnull(zusatz):
zusatz = ""
zuwenig.append([row[amtX], row[amtY], row[amtPLZ], row[amtStr], row[amtNr], zusatz])
elif row['_merge'] == "right_only":
zuviel.append([row["@lat"], row["@lon"], row["@type"][0]+row["@id"], row["addr:postcode"], row["addr:street"], row["addr:housenumber"]])
day = datetime.now().strftime("%d%m%y")
trefferfn = "auswertung" + day + "-treffer.csv"
with open(trefferfn, 'w', newline='') as f:
writer = csv.writer(f, delimiter=';', quoting=csv.QUOTE_MINIMAL)
writer.writerow([amtX, amtY, "lat_osm", "lon_osm", "distance", "full_id", amtPLZ, amtStr, amtNr, amtZus])
writer.writerows(treffer)
print("Datei " + trefferfn + " geschrieben")
zuwenigfn = "auswertung" + day + "-zuwenig.csv"
with open(zuwenigfn, 'w', newline='') as f:
writer = csv.writer(f, delimiter=';', quoting=csv.QUOTE_MINIMAL)
writer.writerow([amtX, amtY, amtPLZ, amtStr, amtNr, amtZus])
writer.writerows(zuwenig)
print("Datei " + zuwenigfn + " geschrieben")
zuvielfn = "auswertung" + day + "-zuviel.csv"
with open(zuvielfn, 'w', newline='') as f:
writer = csv.writer(f, delimiter=';', quoting=csv.QUOTE_MINIMAL)
writer.writerow(["@lat", "@lon", "full_id", "addr:postcode", "addr:street", "addr:housenumber"])
writer.writerows(zuviel)
print("Datei " + zuvielfn + " geschrieben")
@grischard

This comment has been minimized.

Copy link

commented Feb 19, 2018

Für meine csv-Verbesserungen und Ergänzungen hab ich https://github.com/grischard/csventrifuge gebaut. Dein Code scheint aber viel eleganter zu sein!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.