Skip to content

Instantly share code, notes, and snippets.

@mromanello
Created March 15, 2013 08:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mromanello/5168303 to your computer and use it in GitHub Desktop.
Save mromanello/5168303 to your computer and use it in GitHub Desktop.
Python script to create an example of data that can be displyaed in DARIAH geo-browser <http://dev2.dariah.eu/e4d/>, a tool to visualize data in time and space. Data are drawn from the Gazetteer and the Opac (library catalogue) of the German Archaeological Institute (DAI). This dataset contains data about publications related to the Limes groupe…
import json
import urllib
import urllib2
import sys
import codecs
"""
Author: Matteo Romanello, <mro@dainst.de>
"""
def to_KML(place_name, coord, date, desc):
"""
This function exports data to KML format
"""
template = """<Placemark>
<name>%s</name>
<Point>
<coordinates>%s,%s</coordinates>
</Point>
<TimeSpan>
<begin>%s</begin>
<end>%s</end>
</TimeSpan>
<description><![CDATA[
%s
]]></description>
</Placemark>"""%(place_name,coord[0],coord[1],date,date,desc)
return template
def get_related_publications(zenon_url = "http://highgrass.uni-koeln.de/elwms-zenon/search", key = None):
"""
This function fetches publication from Zenon, DAI's opac, using `key` as search key and by querying its RESTful API.
In this context we used as search key the subject entry in the thesaurus corresponding to a given place in the Limes.
"""
result = []
if(key is not None):
zenon_params = {'q':'f999_1:%s'%key,'limit': 1000}
zenon_full_url = "%s?%s"%(zenon_url,urllib.urlencode(zenon_params))
zenon_data = urllib2.urlopen(zenon_full_url)
js_publications = json.load(zenon_data)
if(js_publications.has_key('data')):
for pub in js_publications['data']:
temp = {}
temp['id'] = pub["id"]
temp['title'] = pub["title"]
temp['author'] = pub["author"]
temp['date'] = pub['imprint'].replace('.','') # here we need to have some more sophisticated normalization
temp['zenon_id'] = pub['id']
temp['link'] = "http://testopac.dainst.org/&#35;book/%s"%temp['zenon_id']
result.append(temp)
return result
def get_places(url = "http://gazetteer.dainst.org/search.json"):
"""
This function retrieves geo-data for Limes locations from DAI's gazetteer using its RESTful API.
"""
result = []
# here is about querying the gazetters and getting the places related to the Limes
params = {'limit': 1000, 'q': 'tags:limes'} # limit param is very important here to get the full results
url_values = urllib.urlencode(params)
full_url = "%s?%s"%(url,url_values)
data = urllib2.urlopen(full_url)
js_places = json.load(data)
for pl in js_places['result']:
if(pl.has_key('prefLocation')):
temp = {}
temp['coord'] = pl['prefLocation']['coordinates']
temp['name'] = pl['prefName']['title'] # there could be multiple names here in diff languages
temp['id'] = pl['@id']
temp['subject_headings'] = [id['value'] for id in pl['identifiers'] if id['context']==u'zenon-thesaurus']
temp['gazlink'] = temp["id"].replace("#","&#35;")
result.append(temp)
else:
print >> sys.stderr, "Skipped %s as it has no coordinates"%temp['id']
return result
def main(ofname="output.kml"):
pubs = []
places = get_places()
file = codecs.open(ofname,'w','utf-8')
metadata = """
<ExtendedData xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:description>
This dataset contains data about publications related to the Limes grouped by the place in the Limes they relate to.
The main purpose is the display within the DARIAH Geo-browser, a tool to visualize data in time and space.
</dc:description>
<dc:subject>Archaeology, geo-date, Limes</dc:subject>
<dc:identifier>http://de.digitalclassicist.org/berlin/files/output.kml</dc:identifier>
<dc:language>de</dc:language>
<dc:date>2013-03-15</dc:date>
<dc:rights>Copyright 2000, O'Reilly Network</dc:rights>
<dc:publisher>German Archaeological Institute</dc:publisher>
</ns:metadata>
</ExtendedData>
"""
intro = """<?xml version="1.0" encoding="utf-8"?>
<kml xmlns="http://www.opengis.net/kml/2.2">
<Document>%s
"""%metadata
outro = """
</Document>
</kml>"""
file.write(intro)
for place in places:
for key in place["subject_headings"]:
temp = get_related_publications(key = key.encode('utf-8'))
if(len(temp)>0):
pubs += temp
print >> sys.stderr,"Found %i related publications in Zenon related to %s"%(len(temp),place['name'])
for pub in temp:
if(len(pub['date'])<=4):
desc = "<p>%s <em>%s</em> (%s) [<a target=\"_blank\" href='%s'>View in Zenon</a> ; <a href='%s' target=\"_blank\">View in Gazetteer</a> ] </p>"%(pub['author'],pub['title'],pub['date'],pub['link'],place['gazlink'])
file.write(to_KML(place['name'],place['coord'],pub['date'],desc))
#print >> sys.stderr, len(pubs)
file.write(outro)
file.close()
if __name__ == "__main__":
main()
# if you want the output printed to another destination
#main(ofname="thefilenameidecide.kml")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment