Last active
February 24, 2022 08:40
-
-
Save johnjohndoe/10466267 to your computer and use it in GitHub Desktop.
Gedenktafeln in Berlin, http://daten.berlin.de/datensaetze/liste-der-gedenktafeln-berlin, STILL BUGGY DATA
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# | |
# Requires pyshp: https://pypi.python.org/pypi/pyshp | |
# | |
# Conversion for http://daten.berlin.de/datensaetze/liste-der-gedenktafeln-berlin | |
# File: http://gedenktafeln-in-berlin.de/index.php?id=31&type=123 | |
# | |
from xml.etree import ElementTree | |
from datetime import datetime | |
import shapefile | |
import os | |
def get_value(list, index, default): | |
value = list[index] | |
if value is None: | |
value = default | |
else: | |
value = value.text | |
if value is None: | |
value = default | |
else: | |
# value = value.replace(u'\xdf', u' ') | |
value = value.encode("utf-8") | |
return value | |
def add_shape(writer, attributes): | |
uid = int(get_value(attributes, 0, 0)) | |
url = get_value(attributes, 1, "") | |
tstamp = get_value(attributes, 2, None) | |
if tstamp is not None: | |
tstamp = datetime.strptime(tstamp, '%d.%m.%Y') | |
ortsteil = get_value(attributes, 3, "") | |
strasse = get_value(attributes, 4, "") | |
longitude = get_value(attributes, 5, None) | |
latitude = get_value(attributes, 6, None) | |
Name = get_value(attributes, 7, "") | |
inhalt = get_value(attributes, 8, "") | |
erlauterung = get_value(attributes, 9, "") | |
swo = get_value(attributes, 10, "") | |
literatur = get_value(attributes, 11, "") | |
personen = get_value(attributes, 12, "") | |
entfernt = int(get_value(attributes, 13, 0)) | |
if longitude is not None or latitude is not None: | |
longitude = float(longitude) | |
latitude = float(latitude) | |
# Fix interchanged coordinates | |
temp = 0 | |
if longitude > latitude: | |
temp = latitude | |
latitude = longitude | |
longitude = temp | |
# Add coordinates | |
writer.point(longitude, latitude) | |
# Add attributes | |
writer.record(uid, url, tstamp, ortsteil, strasse, Name, inhalt, erlauterung, swo, literatur, personen, entfernt) | |
xml_file = 'gedenktafeln.xml' | |
shape_file = 'gedenktafeln.shp' | |
projection = 'GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]' | |
tree = ElementTree.parse(xml_file) | |
writer = shapefile.Writer(shapefile.POINT) | |
writer.field('uid', fieldType = 'N', size = 5, decimal = 0) | |
writer.field('url', fieldType = 'C', size = 255) | |
writer.field('tstamp', fieldType = 'C', size = 19) # Type 'D' seems to be not working here. | |
writer.field('ortsteil', fieldType = 'C', size = 200) | |
writer.field('strasse', fieldType = 'C', size = 200) | |
writer.field('Name', fieldType = 'C', size = 255) | |
writer.field('inhalt', fieldType = 'C', size = 255) | |
writer.field('erlauterung', fieldType = 'C', size = 255) | |
writer.field('swo', fieldType = 'C', size = 255) | |
writer.field('literatur', fieldType = 'C', size = 255) | |
writer.field('personen', fieldType = 'C', size = 255) | |
writer.field('entfernt', fieldType = 'N', size = 1, decimal = 0) | |
root = tree.getroot() | |
shapes = root.getchildren() | |
for shape in shapes: | |
attributes = shape.getchildren() | |
add_shape(writer, attributes) | |
try: | |
writer.save(shape_file) | |
except Exception, e: | |
print "ortsteil: " + ortsteil | |
print "strasse: " + strasse | |
print "Name: " + Name | |
print "inhalt: " + inhalt | |
print "erlauterung: " + erlauterung | |
print "swo: " + swo | |
print "literatur: " + literatur | |
print "personen: " + personen | |
print "entfernt: " + entfernt | |
raise | |
# create the PRJ file | |
with open(os.path.splitext(shape_file)[0] + os.extsep + 'prj', 'w') as prj: | |
prj.write(projection) |
Remove this line: https://gist.github.com/johnjohndoe/10466267#file-gedenktafeln-berlin-py-L25 to make the encoding work
I would also suggest to run pep8 over your script, to fix the code style. Some lines are too long and there are to many empty lines between functions :)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
These variables are not global and therefore not defined: https://gist.github.com/johnjohndoe/10466267#file-gedenktafeln-berlin-py-L99-L107