Skip to content

Instantly share code, notes, and snippets.

@christophermorse
Created November 24, 2015 03:01
Show Gist options
  • Save christophermorse/a100ec8878172cdf0717 to your computer and use it in GitHub Desktop.
Save christophermorse/a100ec8878172cdf0717 to your computer and use it in GitHub Desktop.
Parse Keys for Latin Perseus Corpora
#!/usr/bin/python
import os, os.path
import json
story = {}
count = 0
info = []
for dirpath, dirnames, filenames in os.walk("latin_text_perseus/"):
for filename in [f for f in filenames if f.endswith(".json")]:
with open(os.path.join(dirpath, filename), "r") as reading:
data = json.loads(reading.read())
author = dirpath.split('/')[1]
try:
story[filename] = data['TEI.2']['teiHeader']['encodingDesc']['refsDecl']
author = { "name": author, "title": filename }
author.update( {"encoding": story[filename]})
info.append(author)
count += 1
except:
story[filename] = data['TEI.2']['teiHeader']['encodingDesc']
author = {"name": author, "title": filename}
author.update({"encoding": story[filename]})
info.append(author)
count += 1
with open("latin_key.json", 'w') as outfile:
json.dump(info, outfile, indent = 4, sort_keys = True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment