mromanello/gist:5168303

## gistfile1.py
import json
import urllib
import urllib2
import sys
import codecs

"""
Author: Matteo Romanello, <mro@dainst.de>
"""

def to_KML(place_name, coord, date, desc):
  """
	This function exports data to KML format
	"""

	template = """<Placemark>
      <name>%s</name>
      <Point>
        <coordinates>%s,%s</coordinates>
      </Point>
      <TimeSpan>
        <begin>%s</begin>
        <end>%s</end>
      </TimeSpan>
	  <description><![CDATA[
	%s
	]]></description>
    </Placemark>"""%(place_name,coord[0],coord[1],date,date,desc)
	return template

def get_related_publications(zenon_url = "http://highgrass.uni-koeln.de/elwms-zenon/search", key = None):
	"""
	This function fetches publication from Zenon, DAI's opac, using `key` as search key and by querying its RESTful API.
	In this context we used as search key the subject entry in the thesaurus corresponding to a given place in the Limes.
	"""
	result = []
	if(key is not None):
		zenon_params = {'q':'f999_1:%s'%key,'limit': 1000}
		zenon_full_url = "%s?%s"%(zenon_url,urllib.urlencode(zenon_params))
		zenon_data = urllib2.urlopen(zenon_full_url)
		js_publications = json.load(zenon_data)
		if(js_publications.has_key('data')):
			for pub in js_publications['data']:
				temp = {}
				temp['id'] = pub["id"]
				temp['title'] = pub["title"]
				temp['author'] = pub["author"]
				temp['date'] = pub['imprint'].replace('.','') # here we need to have some more sophisticated normalization
				temp['zenon_id'] = pub['id']
				temp['link'] = "http://testopac.dainst.org/&#35;book/%s"%temp['zenon_id']
				result.append(temp)
	return result

def get_places(url = "http://gazetteer.dainst.org/search.json"):
	"""
	This function retrieves geo-data for Limes locations from DAI's gazetteer using its RESTful API.
	"""
	result = []
	# here is about querying the gazetters and getting the places related to the Limes
	params = {'limit': 1000, 'q': 'tags:limes'} # limit param is very important here to get the full results
	url_values = urllib.urlencode(params)
	full_url = "%s?%s"%(url,url_values)
	data = urllib2.urlopen(full_url)
	js_places = json.load(data)
	for pl in js_places['result']:
		if(pl.has_key('prefLocation')):
			temp = {}
			temp['coord'] = pl['prefLocation']['coordinates']
			temp['name'] = pl['prefName']['title'] # there could be multiple names here in diff languages
			temp['id'] = pl['@id']
			temp['subject_headings'] = [id['value'] for id in pl['identifiers'] if id['context']==u'zenon-thesaurus']
			temp['gazlink'] =  temp["id"].replace("#","&#35;")
			result.append(temp)
		else:
			print >> sys.stderr, "Skipped %s as it has no coordinates"%temp['id']
	return result

def main(ofname="output.kml"):
	pubs = []
	places = get_places()
	file = codecs.open(ofname,'w','utf-8')

	metadata = """
	<ExtendedData xmlns:dc="http://purl.org/dc/elements/1.1/">
                 <dc:description>
				This dataset contains data about publications related to the Limes grouped by the place in the Limes they relate to.
				The main purpose is the display within the DARIAH Geo-browser, a tool to visualize data in time and space.
                 </dc:description>
                 <dc:subject>Archaeology, geo-date, Limes</dc:subject>
                 <dc:identifier>http://de.digitalclassicist.org/berlin/files/output.kml</dc:identifier>
                 <dc:language>de</dc:language>
                 <dc:date>2013-03-15</dc:date>
                 <dc:rights>Copyright 2000, O'Reilly Network</dc:rights>
                 <dc:publisher>German Archaeological Institute</dc:publisher>
                </ns:metadata>
            </ExtendedData>
	"""

	intro = """<?xml version="1.0" encoding="utf-8"?>
	<kml xmlns="http://www.opengis.net/kml/2.2">
	  <Document>%s
	"""%metadata
	outro = """
	  </Document>
	</kml>"""

	file.write(intro)
	for place in places:
		for key in place["subject_headings"]:
			temp = get_related_publications(key = key.encode('utf-8'))
			if(len(temp)>0):
				pubs += temp
				print >> sys.stderr,"Found %i related publications in Zenon related to %s"%(len(temp),place['name'])
				for pub in temp:
					if(len(pub['date'])<=4):
						desc = "<p>%s <em>%s</em> (%s) [<a target=\"_blank\" href='%s'>View in Zenon</a> ; <a href='%s' target=\"_blank\">View in Gazetteer</a> ] </p>"%(pub['author'],pub['title'],pub['date'],pub['link'],place['gazlink'])
						file.write(to_KML(place['name'],place['coord'],pub['date'],desc))
	#print >> sys.stderr, len(pubs)
	file.write(outro)
	file.close()


if __name__ == "__main__":
	main()
	# if you want the output printed to another destination
	#main(ofname="thefilenameidecide.kml")
	import json
	import urllib
	import urllib2
	import sys
	import codecs

	"""
	Author: Matteo Romanello, <mro@dainst.de>
	"""

	def to_KML(place_name, coord, date, desc):
	"""
	This function exports data to KML format
	"""

	template = """<Placemark>
	<name>%s</name>
	<Point>
	<coordinates>%s,%s</coordinates>
	</Point>
	<TimeSpan>
	<begin>%s</begin>
	<end>%s</end>
	</TimeSpan>
	<description><![CDATA[
	%s
	]]></description>
	</Placemark>"""%(place_name,coord[0],coord[1],date,date,desc)
	return template

	def get_related_publications(zenon_url = "http://highgrass.uni-koeln.de/elwms-zenon/search", key = None):
	"""
	This function fetches publication from Zenon, DAI's opac, using `key` as search key and by querying its RESTful API.
	In this context we used as search key the subject entry in the thesaurus corresponding to a given place in the Limes.
	"""
	result = []
	if(key is not None):
	zenon_params = {'q':'f999_1:%s'%key,'limit': 1000}
	zenon_full_url = "%s?%s"%(zenon_url,urllib.urlencode(zenon_params))
	zenon_data = urllib2.urlopen(zenon_full_url)
	js_publications = json.load(zenon_data)
	if(js_publications.has_key('data')):
	for pub in js_publications['data']:
	temp = {}
	temp['id'] = pub["id"]
	temp['title'] = pub["title"]
	temp['author'] = pub["author"]
	temp['date'] = pub['imprint'].replace('.','') # here we need to have some more sophisticated normalization
	temp['zenon_id'] = pub['id']
	temp['link'] = "http://testopac.dainst.org/#book/%s"%temp['zenon_id']
	result.append(temp)
	return result

	def get_places(url = "http://gazetteer.dainst.org/search.json"):
	"""
	This function retrieves geo-data for Limes locations from DAI's gazetteer using its RESTful API.
	"""
	result = []
	# here is about querying the gazetters and getting the places related to the Limes
	params = {'limit': 1000, 'q': 'tags:limes'} # limit param is very important here to get the full results
	url_values = urllib.urlencode(params)
	full_url = "%s?%s"%(url,url_values)
	data = urllib2.urlopen(full_url)
	js_places = json.load(data)
	for pl in js_places['result']:
	if(pl.has_key('prefLocation')):
	temp = {}
	temp['coord'] = pl['prefLocation']['coordinates']
	temp['name'] = pl['prefName']['title'] # there could be multiple names here in diff languages
	temp['id'] = pl['@id']
	temp['subject_headings'] = [id['value'] for id in pl['identifiers'] if id['context']==u'zenon-thesaurus']
	temp['gazlink'] = temp["id"].replace("#","#")
	result.append(temp)
	else:
	print >> sys.stderr, "Skipped %s as it has no coordinates"%temp['id']
	return result

	def main(ofname="output.kml"):
	pubs = []
	places = get_places()
	file = codecs.open(ofname,'w','utf-8')

	metadata = """
	<ExtendedData xmlns:dc="http://purl.org/dc/elements/1.1/">
	<dc:description>
	This dataset contains data about publications related to the Limes grouped by the place in the Limes they relate to.
	The main purpose is the display within the DARIAH Geo-browser, a tool to visualize data in time and space.
	</dc:description>
	<dc:subject>Archaeology, geo-date, Limes</dc:subject>
	<dc:identifier>http://de.digitalclassicist.org/berlin/files/output.kml</dc:identifier>
	<dc:language>de</dc:language>
	<dc:date>2013-03-15</dc:date>
	<dc:rights>Copyright 2000, O'Reilly Network</dc:rights>
	<dc:publisher>German Archaeological Institute</dc:publisher>
	</ns:metadata>
	</ExtendedData>
	"""

	intro = """<?xml version="1.0" encoding="utf-8"?>
	<kml xmlns="http://www.opengis.net/kml/2.2">
	<Document>%s
	"""%metadata
	outro = """
	</Document>
	</kml>"""

	file.write(intro)
	for place in places:
	for key in place["subject_headings"]:
	temp = get_related_publications(key = key.encode('utf-8'))
	if(len(temp)>0):
	pubs += temp
	print >> sys.stderr,"Found %i related publications in Zenon related to %s"%(len(temp),place['name'])
	for pub in temp:
	if(len(pub['date'])<=4):
	desc = "<p>%s <em>%s</em> (%s) [<a target=\"_blank\" href='%s'>View in Zenon</a> ; <a href='%s' target=\"_blank\">View in Gazetteer</a> ] </p>"%(pub['author'],pub['title'],pub['date'],pub['link'],place['gazlink'])
	file.write(to_KML(place['name'],place['coord'],pub['date'],desc))
	#print >> sys.stderr, len(pubs)
	file.write(outro)
	file.close()


	if __name__ == "__main__":
	main()
	# if you want the output printed to another destination
	#main(ofname="thefilenameidecide.kml")