Skip to content

Instantly share code, notes, and snippets.

@seanherron
Created February 25, 2014 12:39
Show Gist options
  • Save seanherron/9208059 to your computer and use it in GitHub Desktop.
Save seanherron/9208059 to your computer and use it in GitHub Desktop.
RIDB ES Indexer
from lxml import etree
from pyelasticsearch import ElasticSearch
es = ElasticSearch('http://localhost:9200/')
def RecArea(filename):
ns = etree.FunctionNamespace("http://www.recreation.gov/architecture/")
ns.prefix = "arc"
doc = etree.parse(filename)
for area in doc.xpath('//arc:RecArea'):
values = {}
children = area.getchildren()
for child in children:
if child.tag.replace("{http://www.recreation.gov/architecture/}", "") == "RecAreaID":
recareaid = child.text
values[child.tag.replace("{http://www.recreation.gov/architecture/}", "")] = child.text
address_path = str("//arc:RecAreaAddress/arc:RecAreaID[text()='%s']/.." % recareaid)
for address in doc.xpath(address_path):
children = address.getchildren()
for child in children:
values[child.tag.replace("{http://www.recreation.gov/architecture/}", "")] = child.text
print values
es.index("ridb", "RecArea", values, id=recareaid)
RecArea('formatted.xml')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment