Skip to content

Instantly share code, notes, and snippets.

@jczaplew
Last active August 29, 2015 14:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jczaplew/cc47481a02a51708c1bb to your computer and use it in GitHub Desktop.
Save jczaplew/cc47481a02a51708c1bb to your computer and use it in GitHub Desktop.
nlp2json.py
from bs4 import BeautifulSoup
import json
xml = ""
with open("dump/sentences.txt.xml") as input:
xml = input.read()
soup = BeautifulSoup(xml, "xml")
output = {"sentences": [{
"id": sentence["id"],
"tokens": [{
"id": token["id"],
"word": token.word.text,
"lemma": token.lemma.text,
"characterOffsetBegin": token.CharacterOffsetBegin.text,
"characterOffsetEnd": token.CharacterOffsetEnd.text,
"pos": token.POS.text,
"ner": token.NER.text
} for token in sentence.find_all("token")],
"parse": sentence.find_all("parse")[0].text,
"dependencies": [{
"type": dependency["type"],
"dependencies": [{
"type": sub_dep["type"],
"governor": {
"idx": sub_dep.governor["idx"],
"value": sub_dep.governor.text
},
"dependent": {
"idx": sub_dep.dependent["idx"],
"value": sub_dep.dependent.text
}
} for sub_dep in dependency.find_all("dep")]
} for dependency in sentence.find_all("dependencies")],
"machineReading": {"entities": [{
"id": entity["id"],
"val": entity.text.strip(),
"start": entity.span["start"],
"end": entity.span["end"]
} for entity in sentence.find_all("entity")]}
} for sentence in soup.root.document.sentences.find_all("sentence")]}
print json.dumps(output, indent=2)
@jczaplew
Copy link
Author

jczaplew commented Jul 7, 2015

Example output:

{
  "sentences": [
    {
      "tokens": [
        {
          "word": "Glenwood", 
          "characterOffsetEnd": "8", 
          "pos": "NNP", 
          "lemma": "Glenwood", 
          "characterOffsetBegin": "0", 
          "ner": "STRAT_NAME", 
          "id": "1"
        }, 
        {
          "word": "Formation", 
          "characterOffsetEnd": "18", 
          "pos": "NN", 
          "lemma": "formation", 
          "characterOffsetBegin": "9", 
          "ner": "STRAT_NAME", 
          "id": "2"
        }, 
        {
          "word": ".", 
          "characterOffsetEnd": "19", 
          "pos": ".", 
          "lemma": ".", 
          "characterOffsetBegin": "18", 
          "ner": "O", 
          "id": "3"
        }
      ], 
      "parse": "(ROOT (NP (NP (NNP Glenwood)) (NP (NN Formation)) (. .))) ", 
      "dependencies": [
        {
          "dependencies": [
            {
              "dependent": {
                "value": "Formation", 
                "idx": "2"
              }, 
              "type": "root", 
              "governor": {
                "value": "ROOT", 
                "idx": "0"
              }
            }, 
            {
              "dependent": {
                "value": "Glenwood", 
                "idx": "1"
              }, 
              "type": "nn", 
              "governor": {
                "value": "Formation", 
                "idx": "2"
              }
            }, 
            {
              "dependent": {
                "value": ".", 
                "idx": "3"
              }, 
              "type": "punct", 
              "governor": {
                "value": "Formation", 
                "idx": "2"
              }
            }
          ], 
          "type": "basic-dependencies"
        }, 
        {
          "dependencies": [
            {
              "dependent": {
                "value": "Formation", 
                "idx": "2"
              }, 
              "type": "root", 
              "governor": {
                "value": "ROOT", 
                "idx": "0"
              }
            }, 
            {
              "dependent": {
                "value": "Glenwood", 
                "idx": "1"
              }, 
              "type": "nn", 
              "governor": {
                "value": "Formation", 
                "idx": "2"
              }
            }, 
            {
              "dependent": {
                "value": ".", 
                "idx": "3"
              }, 
              "type": "punct", 
              "governor": {
                "value": "Formation", 
                "idx": "2"
              }
            }
          ], 
          "type": "collapsed-dependencies"
        }, 
        {
          "dependencies": [
            {
              "dependent": {
                "value": "Formation", 
                "idx": "2"
              }, 
              "type": "root", 
              "governor": {
                "value": "ROOT", 
                "idx": "0"
              }
            }, 
            {
              "dependent": {
                "value": "Glenwood", 
                "idx": "1"
              }, 
              "type": "nn", 
              "governor": {
                "value": "Formation", 
                "idx": "2"
              }
            }, 
            {
              "dependent": {
                "value": ".", 
                "idx": "3"
              }, 
              "type": "punct", 
              "governor": {
                "value": "Formation", 
                "idx": "2"
              }
            }
          ], 
          "type": "collapsed-ccprocessed-dependencies"
        }
      ], 
      "id": "1", 
      "machineReading": {
        "entities": [
          {
            "start": "1", 
            "end": "2", 
            "id": "EntityMention-1", 
            "val": "O"
          }
        ]
      }
    }, 
    {
      "tokens": [
        {
          "word": "Sandstone", 
          "characterOffsetEnd": "29", 
          "pos": "NNP", 
          "lemma": "Sandstone", 
          "characterOffsetBegin": "20", 
          "ner": "O", 
          "id": "1"
        }, 
        {
          "word": ",", 
          "characterOffsetEnd": "30", 
          "pos": ",", 
          "lemma": ",", 
          "characterOffsetBegin": "29", 
          "ner": "O", 
          "id": "2"
        }, 
        {
          "word": "siltstones", 
          "characterOffsetEnd": "41", 
          "pos": "NNS", 
          "lemma": "siltstone", 
          "characterOffsetBegin": "31", 
          "ner": "LITH", 
          "id": "3"
        }, 
        {
          "word": ",", 
          "characterOffsetEnd": "42", 
          "pos": ",", 
          "lemma": ",", 
          "characterOffsetBegin": "41", 
          "ner": "O", 
          "id": "4"
        }, 
        {
          "word": "and/or", 
          "characterOffsetEnd": "49", 
          "pos": "CC", 
          "lemma": "and/or", 
          "characterOffsetBegin": "43", 
          "ner": "O", 
          "id": "5"
        }, 
        {
          "word": "shale", 
          "characterOffsetEnd": "55", 
          "pos": "NN", 
          "lemma": "shale", 
          "characterOffsetBegin": "50", 
          "ner": "LITH", 
          "id": "6"
        }, 
        {
          "word": ".", 
          "characterOffsetEnd": "56", 
          "pos": ".", 
          "lemma": ".", 
          "characterOffsetBegin": "55", 
          "ner": "O", 
          "id": "7"
        }
      ], 
      "parse": "(ROOT (NP (NP (NNP Sandstone)) (, ,) (NP (NP (NNS siltstones)) (, ,) (NP (CC and/or) (NN shale))) (. .))) ", 
      "dependencies": [
        {
          "dependencies": [
            {
              "dependent": {
                "value": "Sandstone", 
                "idx": "1"
              }, 
              "type": "root", 
              "governor": {
                "value": "ROOT", 
                "idx": "0"
              }
            }, 
            {
              "dependent": {
                "value": ",", 
                "idx": "2"
              }, 
              "type": "punct", 
              "governor": {
                "value": "Sandstone", 
                "idx": "1"
              }
            }, 
            {
              "dependent": {
                "value": "siltstones", 
                "idx": "3"
              }, 
              "type": "conj", 
              "governor": {
                "value": "Sandstone", 
                "idx": "1"
              }
            }, 
            {
              "dependent": {
                "value": ",", 
                "idx": "4"
              }, 
              "type": "punct", 
              "governor": {
                "value": "Sandstone", 
                "idx": "1"
              }
            }, 
            {
              "dependent": {
                "value": "and/or", 
                "idx": "5"
              }, 
              "type": "cc", 
              "governor": {
                "value": "Sandstone", 
                "idx": "1"
              }
            }, 
            {
              "dependent": {
                "value": "shale", 
                "idx": "6"
              }, 
              "type": "conj", 
              "governor": {
                "value": "Sandstone", 
                "idx": "1"
              }
            }, 
            {
              "dependent": {
                "value": ".", 
                "idx": "7"
              }, 
              "type": "punct", 
              "governor": {
                "value": "Sandstone", 
                "idx": "1"
              }
            }
          ], 
          "type": "basic-dependencies"
        }, 
        {
          "dependencies": [
            {
              "dependent": {
                "value": "Sandstone", 
                "idx": "1"
              }, 
              "type": "root", 
              "governor": {
                "value": "ROOT", 
                "idx": "0"
              }
            }, 
            {
              "dependent": {
                "value": ",", 
                "idx": "2"
              }, 
              "type": "punct", 
              "governor": {
                "value": "Sandstone", 
                "idx": "1"
              }
            }, 
            {
              "dependent": {
                "value": "siltstones", 
                "idx": "3"
              }, 
              "type": "conj:and/or", 
              "governor": {
                "value": "Sandstone", 
                "idx": "1"
              }
            }, 
            {
              "dependent": {
                "value": ",", 
                "idx": "4"
              }, 
              "type": "punct", 
              "governor": {
                "value": "Sandstone", 
                "idx": "1"
              }
            }, 
            {
              "dependent": {
                "value": "and/or", 
                "idx": "5"
              }, 
              "type": "cc", 
              "governor": {
                "value": "Sandstone", 
                "idx": "1"
              }
            }, 
            {
              "dependent": {
                "value": "shale", 
                "idx": "6"
              }, 
              "type": "conj:and/or", 
              "governor": {
                "value": "Sandstone", 
                "idx": "1"
              }
            }, 
            {
              "dependent": {
                "value": ".", 
                "idx": "7"
              }, 
              "type": "punct", 
              "governor": {
                "value": "Sandstone", 
                "idx": "1"
              }
            }
          ], 
          "type": "collapsed-dependencies"
        }, 
        {
          "dependencies": [
            {
              "dependent": {
                "value": "Sandstone", 
                "idx": "1"
              }, 
              "type": "root", 
              "governor": {
                "value": "ROOT", 
                "idx": "0"
              }
            }, 
            {
              "dependent": {
                "value": ",", 
                "idx": "2"
              }, 
              "type": "punct", 
              "governor": {
                "value": "Sandstone", 
                "idx": "1"
              }
            }, 
            {
              "dependent": {
                "value": "siltstones", 
                "idx": "3"
              }, 
              "type": "conj:and/or", 
              "governor": {
                "value": "Sandstone", 
                "idx": "1"
              }
            }, 
            {
              "dependent": {
                "value": ",", 
                "idx": "4"
              }, 
              "type": "punct", 
              "governor": {
                "value": "Sandstone", 
                "idx": "1"
              }
            }, 
            {
              "dependent": {
                "value": "and/or", 
                "idx": "5"
              }, 
              "type": "cc", 
              "governor": {
                "value": "Sandstone", 
                "idx": "1"
              }
            }, 
            {
              "dependent": {
                "value": "shale", 
                "idx": "6"
              }, 
              "type": "conj:and/or", 
              "governor": {
                "value": "Sandstone", 
                "idx": "1"
              }
            }, 
            {
              "dependent": {
                "value": ".", 
                "idx": "7"
              }, 
              "type": "punct", 
              "governor": {
                "value": "Sandstone", 
                "idx": "1"
              }
            }
          ], 
          "type": "collapsed-ccprocessed-dependencies"
        }
      ], 
      "id": "2", 
      "machineReading": {
        "entities": [
          {
            "start": "2", 
            "end": "3", 
            "id": "EntityMention-2", 
            "val": "O"
          }, 
          {
            "start": "5", 
            "end": "6", 
            "id": "EntityMention-3", 
            "val": "O"
          }
        ]
      }
    }, 
    {
      "tokens": [
        {
          "word": "St.", 
          "characterOffsetEnd": "61", 
          "pos": "NNP", 
          "lemma": "St.", 
          "characterOffsetBegin": "58", 
          "ner": "STRAT_NAME", 
          "id": "1"
        }, 
        {
          "word": "Peter", 
          "characterOffsetEnd": "67", 
          "pos": "NNP", 
          "lemma": "Peter", 
          "characterOffsetBegin": "62", 
          "ner": "STRAT_NAME", 
          "id": "2"
        }, 
        {
          "word": "Formation", 
          "characterOffsetEnd": "77", 
          "pos": "NN", 
          "lemma": "formation", 
          "characterOffsetBegin": "68", 
          "ner": "STRAT_NAME", 
          "id": "3"
        }, 
        {
          "word": ":", 
          "characterOffsetEnd": "78", 
          "pos": ":", 
          "lemma": ":", 
          "characterOffsetBegin": "77", 
          "ner": "O", 
          "id": "4"
        }, 
        {
          "word": "upper", 
          "characterOffsetEnd": "84", 
          "pos": "JJ", 
          "lemma": "upper", 
          "characterOffsetBegin": "79", 
          "ner": "O", 
          "id": "5"
        }, 
        {
          "word": "Tonti", 
          "characterOffsetEnd": "90", 
          "pos": "NNP", 
          "lemma": "Tonti", 
          "characterOffsetBegin": "85", 
          "ner": "STRAT_NAME", 
          "id": "6"
        }, 
        {
          "word": "Member", 
          "characterOffsetEnd": "97", 
          "pos": "NNP", 
          "lemma": "Member", 
          "characterOffsetBegin": "91", 
          "ner": "STRAT_NAME", 
          "id": "7"
        }, 
        {
          "word": "is", 
          "characterOffsetEnd": "100", 
          "pos": "VBZ", 
          "lemma": "be", 
          "characterOffsetBegin": "98", 
          "ner": "O", 
          "id": "8"
        }, 
        {
          "word": "a", 
          "characterOffsetEnd": "102", 
          "pos": "DT", 
          "lemma": "a", 
          "characterOffsetBegin": "101", 
          "ner": "O", 
          "id": "9"
        }, 
        {
          "word": "medium-grained", 
          "characterOffsetEnd": "117", 
          "pos": "JJ", 
          "lemma": "medium-grained", 
          "characterOffsetBegin": "103", 
          "ner": "O", 
          "id": "10"
        }, 
        {
          "word": ",", 
          "characterOffsetEnd": "118", 
          "pos": ",", 
          "lemma": ",", 
          "characterOffsetBegin": "117", 
          "ner": "O", 
          "id": "11"
        }, 
        {
          "word": "mature", 
          "characterOffsetEnd": "125", 
          "pos": "JJ", 
          "lemma": "mature", 
          "characterOffsetBegin": "119", 
          "ner": "O", 
          "id": "12"
        }, 
        {
          "word": "quartz", 
          "characterOffsetEnd": "132", 
          "pos": "NN", 
          "lemma": "quartz", 
          "characterOffsetBegin": "126", 
          "ner": "LITH_ATT", 
          "id": "13"
        }, 
        {
          "word": "sandstone", 
          "characterOffsetEnd": "142", 
          "pos": "NN", 
          "lemma": "sandstone", 
          "characterOffsetBegin": "133", 
          "ner": "LITH", 
          "id": "14"
        }, 
        {
          "word": ";", 
          "characterOffsetEnd": "143", 
          "pos": ":", 
          "lemma": ";", 
          "characterOffsetBegin": "142", 
          "ner": "O", 
          "id": "15"
        }, 
        {
          "word": "basal", 
          "characterOffsetEnd": "150", 
          "pos": "JJ", 
          "lemma": "basal", 
          "characterOffsetBegin": "145", 
          "ner": "O", 
          "id": "16"
        }, 
        {
          "word": "Readstown", 
          "characterOffsetEnd": "160", 
          "pos": "NNP", 
          "lemma": "Readstown", 
          "characterOffsetBegin": "151", 
          "ner": "STRAT_NAME", 
          "id": "17"
        }, 
        {
          "word": "Member", 
          "characterOffsetEnd": "167", 
          "pos": "NNP", 
          "lemma": "Member", 
          "characterOffsetBegin": "161", 
          "ner": "STRAT_NAME", 
          "id": "18"
        }, 
        {
          "word": "is", 
          "characterOffsetEnd": "170", 
          "pos": "VBZ", 
          "lemma": "be", 
          "characterOffsetBegin": "168", 
          "ner": "O", 
          "id": "19"
        }, 
        {
          "word": "immature", 
          "characterOffsetEnd": "179", 
          "pos": "JJ", 
          "lemma": "immature", 
          "characterOffsetBegin": "171", 
          "ner": "O", 
          "id": "20"
        }, 
        {
          "word": ",", 
          "characterOffsetEnd": "180", 
          "pos": ",", 
          "lemma": ",", 
          "characterOffsetBegin": "179", 
          "ner": "O", 
          "id": "21"
        }, 
        {
          "word": "consisting", 
          "characterOffsetEnd": "191", 
          "pos": "VBG", 
          "lemma": "consist", 
          "characterOffsetBegin": "181", 
          "ner": "O", 
          "id": "22"
        }, 
        {
          "word": "of", 
          "characterOffsetEnd": "194", 
          "pos": "IN", 
          "lemma": "of", 
          "characterOffsetBegin": "192", 
          "ner": "O", 
          "id": "23"
        }, 
        {
          "word": "red-brown", 
          "characterOffsetEnd": "204", 
          "pos": "JJ", 
          "lemma": "red-brown", 
          "characterOffsetBegin": "195", 
          "ner": "O", 
          "id": "24"
        }, 
        {
          "word": "shale", 
          "characterOffsetEnd": "210", 
          "pos": "NN", 
          "lemma": "shale", 
          "characterOffsetBegin": "205", 
          "ner": "LITH", 
          "id": "25"
        }, 
        {
          "word": ",", 
          "characterOffsetEnd": "211", 
          "pos": ",", 
          "lemma": ",", 
          "characterOffsetBegin": "210", 
          "ner": "O", 
          "id": "26"
        }, 
        {
          "word": "sandstone", 
          "characterOffsetEnd": "221", 
          "pos": "NN", 
          "lemma": "sandstone", 
          "characterOffsetBegin": "212", 
          "ner": "LITH", 
          "id": "27"
        }, 
        {
          "word": ",", 
          "characterOffsetEnd": "222", 
          "pos": ",", 
          "lemma": ",", 
          "characterOffsetBegin": "221", 
          "ner": "O", 
          "id": "28"
        }, 
        {
          "word": "and", 
          "characterOffsetEnd": "226", 
          "pos": "CC", 
          "lemma": "and", 
          "characterOffsetBegin": "223", 
          "ner": "O", 
          "id": "29"
        }, 
        {
          "word": "chert", 
          "characterOffsetEnd": "232", 
          "pos": "NN", 
          "lemma": "chert", 
          "characterOffsetBegin": "227", 
          "ner": "LITH", 
          "id": "30"
        }, 
        {
          "word": "conglomerate", 
          "characterOffsetEnd": "245", 
          "pos": "NN", 
          "lemma": "conglomerate", 
          "characterOffsetBegin": "233", 
          "ner": "LITH", 
          "id": "31"
        }, 
        {
          "word": ".", 
          "characterOffsetEnd": "246", 
          "pos": ".", 
          "lemma": ".", 
          "characterOffsetBegin": "245", 
          "ner": "O", 
          "id": "32"
        }
      ], 
      "parse": "(ROOT (NP (NP (NP (NNP St.) (NNP Peter)) (NP (NN Formation))) (: :) (S (S (NP (JJ upper) (NNP Tonti) (NNP Member)) (VP (VBZ is) (NP (DT a) (JJ medium-grained) (, ,) (JJ mature) (NN quartz) (NN sandstone)))) (: ;) (S (NP (JJ basal) (NNP Readstown) (NNP Member)) (VP (VBZ is) (ADJP (JJ immature))))) (, ,) (VP (VBG consisting)) (PP (IN of) (NP (JJ red-brown) (NN shale) (, ,) (NN sandstone) (, ,) (CC and) (NN chert) (NN conglomerate))) (. .))) ", 
      "dependencies": [
        {
          "dependencies": [
            {
              "dependent": {
                "value": "Formation", 
                "idx": "3"
              }, 
              "type": "root", 
              "governor": {
                "value": "ROOT", 
                "idx": "0"
              }
            }, 
            {
              "dependent": {
                "value": "St.", 
                "idx": "1"
              }, 
              "type": "nn", 
              "governor": {
                "value": "Formation", 
                "idx": "3"
              }
            }, 
            {
              "dependent": {
                "value": "Peter", 
                "idx": "2"
              }, 
              "type": "nn", 
              "governor": {
                "value": "Formation", 
                "idx": "3"
              }
            }, 
            {
              "dependent": {
                "value": ":", 
                "idx": "4"
              }, 
              "type": "punct", 
              "governor": {
                "value": "Formation", 
                "idx": "3"
              }
            }, 
            {
              "dependent": {
                "value": "upper", 
                "idx": "5"
              }, 
              "type": "amod", 
              "governor": {
                "value": "Member", 
                "idx": "7"
              }
            }, 
            {
              "dependent": {
                "value": "Tonti", 
                "idx": "6"
              }, 
              "type": "nn", 
              "governor": {
                "value": "Member", 
                "idx": "7"
              }
            }, 
            {
              "dependent": {
                "value": "Member", 
                "idx": "7"
              }, 
              "type": "nsubj", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": "is", 
                "idx": "8"
              }, 
              "type": "cop", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": "a", 
                "idx": "9"
              }, 
              "type": "det", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": "medium-grained", 
                "idx": "10"
              }, 
              "type": "amod", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": ",", 
                "idx": "11"
              }, 
              "type": "punct", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": "mature", 
                "idx": "12"
              }, 
              "type": "amod", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": "quartz", 
                "idx": "13"
              }, 
              "type": "nn", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": "sandstone", 
                "idx": "14"
              }, 
              "type": "dep", 
              "governor": {
                "value": "Formation", 
                "idx": "3"
              }
            }, 
            {
              "dependent": {
                "value": ";", 
                "idx": "15"
              }, 
              "type": "punct", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": "basal", 
                "idx": "16"
              }, 
              "type": "amod", 
              "governor": {
                "value": "Member", 
                "idx": "18"
              }
            }, 
            {
              "dependent": {
                "value": "Readstown", 
                "idx": "17"
              }, 
              "type": "nn", 
              "governor": {
                "value": "Member", 
                "idx": "18"
              }
            }, 
            {
              "dependent": {
                "value": "Member", 
                "idx": "18"
              }, 
              "type": "nsubj", 
              "governor": {
                "value": "immature", 
                "idx": "20"
              }
            }, 
            {
              "dependent": {
                "value": "is", 
                "idx": "19"
              }, 
              "type": "cop", 
              "governor": {
                "value": "immature", 
                "idx": "20"
              }
            }, 
            {
              "dependent": {
                "value": "immature", 
                "idx": "20"
              }, 
              "type": "parataxis", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": ",", 
                "idx": "21"
              }, 
              "type": "punct", 
              "governor": {
                "value": "immature", 
                "idx": "20"
              }
            }, 
            {
              "dependent": {
                "value": "consisting", 
                "idx": "22"
              }, 
              "type": "xcomp", 
              "governor": {
                "value": "immature", 
                "idx": "20"
              }
            }, 
            {
              "dependent": {
                "value": "of", 
                "idx": "23"
              }, 
              "type": "prep", 
              "governor": {
                "value": "consisting", 
                "idx": "22"
              }
            }, 
            {
              "dependent": {
                "value": "red-brown", 
                "idx": "24"
              }, 
              "type": "amod", 
              "governor": {
                "value": "shale", 
                "idx": "25"
              }
            }, 
            {
              "dependent": {
                "value": "shale", 
                "idx": "25"
              }, 
              "type": "pobj", 
              "governor": {
                "value": "of", 
                "idx": "23"
              }
            }, 
            {
              "dependent": {
                "value": ",", 
                "idx": "26"
              }, 
              "type": "punct", 
              "governor": {
                "value": "shale", 
                "idx": "25"
              }
            }, 
            {
              "dependent": {
                "value": "sandstone", 
                "idx": "27"
              }, 
              "type": "conj", 
              "governor": {
                "value": "shale", 
                "idx": "25"
              }
            }, 
            {
              "dependent": {
                "value": ",", 
                "idx": "28"
              }, 
              "type": "punct", 
              "governor": {
                "value": "shale", 
                "idx": "25"
              }
            }, 
            {
              "dependent": {
                "value": "and", 
                "idx": "29"
              }, 
              "type": "cc", 
              "governor": {
                "value": "shale", 
                "idx": "25"
              }
            }, 
            {
              "dependent": {
                "value": "chert", 
                "idx": "30"
              }, 
              "type": "nn", 
              "governor": {
                "value": "conglomerate", 
                "idx": "31"
              }
            }, 
            {
              "dependent": {
                "value": "conglomerate", 
                "idx": "31"
              }, 
              "type": "conj", 
              "governor": {
                "value": "shale", 
                "idx": "25"
              }
            }, 
            {
              "dependent": {
                "value": ".", 
                "idx": "32"
              }, 
              "type": "punct", 
              "governor": {
                "value": "Formation", 
                "idx": "3"
              }
            }
          ], 
          "type": "basic-dependencies"
        }, 
        {
          "dependencies": [
            {
              "dependent": {
                "value": "Formation", 
                "idx": "3"
              }, 
              "type": "root", 
              "governor": {
                "value": "ROOT", 
                "idx": "0"
              }
            }, 
            {
              "dependent": {
                "value": "St.", 
                "idx": "1"
              }, 
              "type": "nn", 
              "governor": {
                "value": "Formation", 
                "idx": "3"
              }
            }, 
            {
              "dependent": {
                "value": "Peter", 
                "idx": "2"
              }, 
              "type": "nn", 
              "governor": {
                "value": "Formation", 
                "idx": "3"
              }
            }, 
            {
              "dependent": {
                "value": ":", 
                "idx": "4"
              }, 
              "type": "punct", 
              "governor": {
                "value": "Formation", 
                "idx": "3"
              }
            }, 
            {
              "dependent": {
                "value": "upper", 
                "idx": "5"
              }, 
              "type": "amod", 
              "governor": {
                "value": "Member", 
                "idx": "7"
              }
            }, 
            {
              "dependent": {
                "value": "Tonti", 
                "idx": "6"
              }, 
              "type": "nn", 
              "governor": {
                "value": "Member", 
                "idx": "7"
              }
            }, 
            {
              "dependent": {
                "value": "Member", 
                "idx": "7"
              }, 
              "type": "nsubj", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": "is", 
                "idx": "8"
              }, 
              "type": "cop", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": "a", 
                "idx": "9"
              }, 
              "type": "det", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": "medium-grained", 
                "idx": "10"
              }, 
              "type": "amod", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": ",", 
                "idx": "11"
              }, 
              "type": "punct", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": "mature", 
                "idx": "12"
              }, 
              "type": "amod", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": "quartz", 
                "idx": "13"
              }, 
              "type": "nn", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": "sandstone", 
                "idx": "14"
              }, 
              "type": "dep", 
              "governor": {
                "value": "Formation", 
                "idx": "3"
              }
            }, 
            {
              "dependent": {
                "value": ";", 
                "idx": "15"
              }, 
              "type": "punct", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": "basal", 
                "idx": "16"
              }, 
              "type": "amod", 
              "governor": {
                "value": "Member", 
                "idx": "18"
              }
            }, 
            {
              "dependent": {
                "value": "Readstown", 
                "idx": "17"
              }, 
              "type": "nn", 
              "governor": {
                "value": "Member", 
                "idx": "18"
              }
            }, 
            {
              "dependent": {
                "value": "Member", 
                "idx": "18"
              }, 
              "type": "nsubj", 
              "governor": {
                "value": "immature", 
                "idx": "20"
              }
            }, 
            {
              "dependent": {
                "value": "is", 
                "idx": "19"
              }, 
              "type": "cop", 
              "governor": {
                "value": "immature", 
                "idx": "20"
              }
            }, 
            {
              "dependent": {
                "value": "immature", 
                "idx": "20"
              }, 
              "type": "parataxis", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": ",", 
                "idx": "21"
              }, 
              "type": "punct", 
              "governor": {
                "value": "immature", 
                "idx": "20"
              }
            }, 
            {
              "dependent": {
                "value": "consisting", 
                "idx": "22"
              }, 
              "type": "xcomp", 
              "governor": {
                "value": "immature", 
                "idx": "20"
              }
            }, 
            {
              "dependent": {
                "value": "of", 
                "idx": "23"
              }, 
              "type": "prep", 
              "governor": {
                "value": "consisting", 
                "idx": "22"
              }
            }, 
            {
              "dependent": {
                "value": "red-brown", 
                "idx": "24"
              }, 
              "type": "amod", 
              "governor": {
                "value": "shale", 
                "idx": "25"
              }
            }, 
            {
              "dependent": {
                "value": "shale", 
                "idx": "25"
              }, 
              "type": "pobj", 
              "governor": {
                "value": "of", 
                "idx": "23"
              }
            }, 
            {
              "dependent": {
                "value": ",", 
                "idx": "26"
              }, 
              "type": "punct", 
              "governor": {
                "value": "shale", 
                "idx": "25"
              }
            }, 
            {
              "dependent": {
                "value": "sandstone", 
                "idx": "27"
              }, 
              "type": "conj:and", 
              "governor": {
                "value": "shale", 
                "idx": "25"
              }
            }, 
            {
              "dependent": {
                "value": ",", 
                "idx": "28"
              }, 
              "type": "punct", 
              "governor": {
                "value": "shale", 
                "idx": "25"
              }
            }, 
            {
              "dependent": {
                "value": "and", 
                "idx": "29"
              }, 
              "type": "cc", 
              "governor": {
                "value": "shale", 
                "idx": "25"
              }
            }, 
            {
              "dependent": {
                "value": "chert", 
                "idx": "30"
              }, 
              "type": "nn", 
              "governor": {
                "value": "conglomerate", 
                "idx": "31"
              }
            }, 
            {
              "dependent": {
                "value": "conglomerate", 
                "idx": "31"
              }, 
              "type": "conj:and", 
              "governor": {
                "value": "shale", 
                "idx": "25"
              }
            }, 
            {
              "dependent": {
                "value": ".", 
                "idx": "32"
              }, 
              "type": "punct", 
              "governor": {
                "value": "Formation", 
                "idx": "3"
              }
            }
          ], 
          "type": "collapsed-dependencies"
        }, 
        {
          "dependencies": [
            {
              "dependent": {
                "value": "Formation", 
                "idx": "3"
              }, 
              "type": "root", 
              "governor": {
                "value": "ROOT", 
                "idx": "0"
              }
            }, 
            {
              "dependent": {
                "value": "St.", 
                "idx": "1"
              }, 
              "type": "nn", 
              "governor": {
                "value": "Formation", 
                "idx": "3"
              }
            }, 
            {
              "dependent": {
                "value": "Peter", 
                "idx": "2"
              }, 
              "type": "nn", 
              "governor": {
                "value": "Formation", 
                "idx": "3"
              }
            }, 
            {
              "dependent": {
                "value": ":", 
                "idx": "4"
              }, 
              "type": "punct", 
              "governor": {
                "value": "Formation", 
                "idx": "3"
              }
            }, 
            {
              "dependent": {
                "value": "upper", 
                "idx": "5"
              }, 
              "type": "amod", 
              "governor": {
                "value": "Member", 
                "idx": "7"
              }
            }, 
            {
              "dependent": {
                "value": "Tonti", 
                "idx": "6"
              }, 
              "type": "nn", 
              "governor": {
                "value": "Member", 
                "idx": "7"
              }
            }, 
            {
              "dependent": {
                "value": "Member", 
                "idx": "7"
              }, 
              "type": "nsubj", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": "is", 
                "idx": "8"
              }, 
              "type": "cop", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": "a", 
                "idx": "9"
              }, 
              "type": "det", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": "medium-grained", 
                "idx": "10"
              }, 
              "type": "amod", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": ",", 
                "idx": "11"
              }, 
              "type": "punct", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": "mature", 
                "idx": "12"
              }, 
              "type": "amod", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": "quartz", 
                "idx": "13"
              }, 
              "type": "nn", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": "sandstone", 
                "idx": "14"
              }, 
              "type": "dep", 
              "governor": {
                "value": "Formation", 
                "idx": "3"
              }
            }, 
            {
              "dependent": {
                "value": ";", 
                "idx": "15"
              }, 
              "type": "punct", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": "basal", 
                "idx": "16"
              }, 
              "type": "amod", 
              "governor": {
                "value": "Member", 
                "idx": "18"
              }
            }, 
            {
              "dependent": {
                "value": "Readstown", 
                "idx": "17"
              }, 
              "type": "nn", 
              "governor": {
                "value": "Member", 
                "idx": "18"
              }
            }, 
            {
              "dependent": {
                "value": "Member", 
                "idx": "18"
              }, 
              "type": "nsubj", 
              "governor": {
                "value": "immature", 
                "idx": "20"
              }
            }, 
            {
              "dependent": {
                "value": "is", 
                "idx": "19"
              }, 
              "type": "cop", 
              "governor": {
                "value": "immature", 
                "idx": "20"
              }
            }, 
            {
              "dependent": {
                "value": "immature", 
                "idx": "20"
              }, 
              "type": "parataxis", 
              "governor": {
                "value": "sandstone", 
                "idx": "14"
              }
            }, 
            {
              "dependent": {
                "value": ",", 
                "idx": "21"
              }, 
              "type": "punct", 
              "governor": {
                "value": "immature", 
                "idx": "20"
              }
            }, 
            {
              "dependent": {
                "value": "consisting", 
                "idx": "22"
              }, 
              "type": "xcomp", 
              "governor": {
                "value": "immature", 
                "idx": "20"
              }
            }, 
            {
              "dependent": {
                "value": "of", 
                "idx": "23"
              }, 
              "type": "prep", 
              "governor": {
                "value": "consisting", 
                "idx": "22"
              }
            }, 
            {
              "dependent": {
                "value": "red-brown", 
                "idx": "24"
              }, 
              "type": "amod", 
              "governor": {
                "value": "shale", 
                "idx": "25"
              }
            }, 
            {
              "dependent": {
                "value": "shale", 
                "idx": "25"
              }, 
              "type": "pobj", 
              "governor": {
                "value": "of", 
                "idx": "23"
              }
            }, 
            {
              "dependent": {
                "value": ",", 
                "idx": "26"
              }, 
              "type": "punct", 
              "governor": {
                "value": "shale", 
                "idx": "25"
              }
            }, 
            {
              "dependent": {
                "value": "sandstone", 
                "idx": "27"
              }, 
              "type": "pobj", 
              "governor": {
                "value": "of", 
                "idx": "23"
              }
            }, 
            {
              "dependent": {
                "value": "sandstone", 
                "idx": "27"
              }, 
              "type": "conj:and", 
              "governor": {
                "value": "shale", 
                "idx": "25"
              }
            }, 
            {
              "dependent": {
                "value": ",", 
                "idx": "28"
              }, 
              "type": "punct", 
              "governor": {
                "value": "shale", 
                "idx": "25"
              }
            }, 
            {
              "dependent": {
                "value": "and", 
                "idx": "29"
              }, 
              "type": "cc", 
              "governor": {
                "value": "shale", 
                "idx": "25"
              }
            }, 
            {
              "dependent": {
                "value": "chert", 
                "idx": "30"
              }, 
              "type": "nn", 
              "governor": {
                "value": "conglomerate", 
                "idx": "31"
              }
            }, 
            {
              "dependent": {
                "value": "conglomerate", 
                "idx": "31"
              }, 
              "type": "pobj", 
              "governor": {
                "value": "of", 
                "idx": "23"
              }
            }, 
            {
              "dependent": {
                "value": "conglomerate", 
                "idx": "31"
              }, 
              "type": "conj:and", 
              "governor": {
                "value": "shale", 
                "idx": "25"
              }
            }, 
            {
              "dependent": {
                "value": ".", 
                "idx": "32"
              }, 
              "type": "punct", 
              "governor": {
                "value": "Formation", 
                "idx": "3"
              }
            }
          ], 
          "type": "collapsed-ccprocessed-dependencies"
        }
      ], 
      "id": "3", 
      "machineReading": {
        "entities": [
          {
            "start": "1", 
            "end": "2", 
            "id": "EntityMention-4", 
            "val": "O"
          }, 
          {
            "start": "6", 
            "end": "7", 
            "id": "EntityMention-5", 
            "val": "O"
          }, 
          {
            "start": "12", 
            "end": "13", 
            "id": "EntityMention-6", 
            "val": "O"
          }, 
          {
            "start": "13", 
            "end": "14", 
            "id": "EntityMention-7", 
            "val": "O"
          }, 
          {
            "start": "17", 
            "end": "18", 
            "id": "EntityMention-8", 
            "val": "O"
          }, 
          {
            "start": "24", 
            "end": "25", 
            "id": "EntityMention-9", 
            "val": "O"
          }, 
          {
            "start": "26", 
            "end": "27", 
            "id": "EntityMention-10", 
            "val": "O"
          }, 
          {
            "start": "30", 
            "end": "31", 
            "id": "EntityMention-11", 
            "val": "O"
          }
        ]
      }
    }
  ]
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment