Skip to content

Instantly share code, notes, and snippets.

@jboynyc
Created April 4, 2016 10:52
Show Gist options
  • Save jboynyc/6ee8df47f6db9126d3b94d80a493b428 to your computer and use it in GitHub Desktop.
Save jboynyc/6ee8df47f6db9126d3b94d80a493b428 to your computer and use it in GitHub Desktop.
Find Dutch blackface parades
import re
import requests
import pandas as pd
from xml.dom.minidom import parseString as xml_parse
MAP_URL = 'http://sintenpietengilde.nl/Kaart.html'
KML_OUT = 'sintenpietengilde_kaart.kml'
XLS_OUT = 'sintenpietengilde_kaart.xlsx'
## helper functions
def el_to_list(tree, tag):
return [i.firstChild.nodeValue for i in tree.getElementsByTagName(tag)]
def get_date(s):
try:
return re.compile(r'(\d+ (?:November|December))').findall(s)[0]
except:
return s
def get_org(s):
try:
return re.compile(r'\((.*)(?:<br/>|\))').findall(x)[0]
except:
return s
## get data
src = requests.get(MAP_URL).text
kml_extractor = re.compile(r'<kml .*</kml>')
kml = kml_extractor.findall(src)[0]
t = xml_parse(kml)
## create spreadsheet
d = pd.DataFrame({'name': el_to_list(t, 'name'),
'description': el_to_list(t, 'description'),
'coordinates': el_to_list(t, 'coordinates')}).set_index('name')
d['date'] = d['description'].map(get_date)
d['org'] = d['description'].map(get_org)
## output
with open(KML_OUT, 'w') as f:
f.write(t.toprettyxml())
d.drop('description', 1).to_excel(XLS_OUT, 'intochten')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment