Skip to content

Instantly share code, notes, and snippets.

@irv
Created September 19, 2018 14:33
Show Gist options
  • Save irv/45f1efea722cccad6c47eae79d96bf5b to your computer and use it in GitHub Desktop.
Save irv/45f1efea722cccad6c47eae79d96bf5b to your computer and use it in GitHub Desktop.
convert gentle word alignment to w3c annotations
import json
def main():
with open("word_aligned.json", "r") as f:
aligned = json.load(f)
transform(aligned["words"])
def transform(words):
enumerated = [{"word":i, "position":x} for x,i in enumerate(words)]
annotations = {"items": list(map(transform_annotation, enumerated))}
print(json.dumps(annotations, indent=4))
def transform_annotation(word):
if "alignedWord" in word["word"]:
return {
"id":"http://example.org/podcast/annotation/{0}".format(word["position"]),
"type":"Annotation",
"motivation":"painting",
"body": {
"id":"http://example.org/podcast/annotation/{0}/word".format(word["position"]),
"type": "Text",
"value": word["word"]["alignedWord"]
},
"target":"http://example.org/podcast/canvas#t={0},{1}".format(word["word"]["start"], word["word"]["end"])
}
if __name__== "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment