Skip to content

Instantly share code, notes, and snippets.

@myedibleenso
Last active January 18, 2019 20:38
Show Gist options
  • Save myedibleenso/87a3191c73938840b8ed768ec305db38 to your computer and use it in GitHub Desktop.
Save myedibleenso/87a3191c73938840b8ed768ec305db38 to your computer and use it in GitHub Desktop.
{
"sentences":[{
"words":["Gonzo","married","Camilla","."],
"startOffsets":[0,6,14,21],
"endOffsets":[5,13,21,22],
"tags":["NNP","VBD","NNP","."],
"lemmas":["Gonzo","marry","Camilla","."],
"entities":["O","O","PERSON","O"],
"norms":["O","O","O","O"],
"chunks":["B-NP","B-VP","B-NP","O"],
"graphs":{
"stanford-basic":{
"edges":[{
"source":1,
"destination":0,
"relation":"nsubj"
},{
"source":1,
"destination":2,
"relation":"dobj"
},{
"source":1,
"destination":3,
"relation":"punct"
}],
"roots":[1]
},
"stanford-collapsed":{
"edges":[{
"source":1,
"destination":0,
"relation":"nsubj"
},{
"source":1,
"destination":2,
"relation":"dobj"
},{
"source":1,
"destination":3,
"relation":"punct"
}],
"roots":[1]
}
}
}]
}
import org.clulab.processors.fastnlp.FastNLPProcessor
import org.clulab.processors.Document
import org.clulab.struct.Interval
import org.clulab.odin._
import org.clulab.odin.serialization.json.{JSONSerializer => OdinJSONSerializer, _}
import org.clulab.serialization.json._
import java.io.File
/** Example demonstrating how to serialize/deserialize
* [[org.clulab.Document]] and [[org.clulab.odin.Mention]] to/from json
*/
object JSONExample extends App {
val rules =
"""
|# NE rules
|
|- name: "ner-person"
| label: [Person, PossiblePerson, Entity]
| priority: 1
| type: token
| pattern: |
| ([entity="PERSON"]+ | "Gonzo")
|
|# Events
|
|# optional location and date
|- name: "marry-syntax-1"
| label: [Marry]
| priority: 3
| example: "He married Jane last June in Hawaii."
| type: dependency
| pattern: |
| trigger = [lemma="marry"]
| spouse: Entity+ = <xcomp? /^nsubj/ | dobj
""".stripMargin
val engine = ExtractorEngine(rules)
val proc = new FastNLPProcessor
val text = "Gonzo married Camilla."
val doc = proc.annotate(text)
val mentions = engine.extractFrom(doc)
// print the json string for the extracted mentions
println(mentions.json(pretty=true))
// save the json to a file
mentions.saveJSON("mentions.json", pretty=true)
// deserialize the mentions json that was written in the last step
val mentions2 = OdinJSONSerializer.toMentions(new File("mentions.json"))
// demonstrate span equivalence to the original mentions
def m2triple(m: Mention):(Int, Int, Interval) = (m.document.equivalenceHash, m.sentence, m.tokenInterval)
mentions.map(m2triple) == mentions2.map(m2triple)
// documents can also be serialized/deserialized
doc.saveJSON("document.json", pretty=true)
val d2 = JSONSerializer.toDocument(new File("document.json"))
// demonstrate equivalence of annotations
doc.equivalenceHash == d2.equivalenceHash
}
{
"documents":{
"-1180172198":{
"sentences":[{
"words":["Gonzo","married","Camilla","."],
"startOffsets":[0,6,14,21],
"endOffsets":[5,13,21,22],
"tags":["NNP","VBD","NNP","."],
"lemmas":["Gonzo","marry","Camilla","."],
"entities":["O","O","PERSON","O"],
"norms":["O","O","O","O"],
"chunks":["B-NP","B-VP","B-NP","O"],
"graphs":{
"stanford-basic":{
"edges":[{
"source":1,
"destination":0,
"relation":"nsubj"
},{
"source":1,
"destination":2,
"relation":"dobj"
},{
"source":1,
"destination":3,
"relation":"punct"
}],
"roots":[1]
},
"stanford-collapsed":{
"edges":[{
"source":1,
"destination":0,
"relation":"nsubj"
},{
"source":1,
"destination":2,
"relation":"dobj"
},{
"source":1,
"destination":3,
"relation":"punct"
}],
"roots":[1]
}
}
}]
}
},
"mentions":[{
"type":"TextBoundMention",
"id":"T:648733472",
"text":"Camilla",
"labels":["Person","PossiblePerson","Entity"],
"tokenInterval":{
"start":2,
"end":3
},
"characterStartOffset":14,
"characterEndOffset":21,
"sentence":0,
"document":"-1180172198",
"keep":true,
"foundBy":"ner-person"
},{
"type":"EventMention",
"id":"E:1351231268",
"text":"Gonzo married Camilla",
"labels":["Marry"],
"trigger":{
"type":"TextBoundMention",
"id":"T:1627076846",
"text":"married",
"labels":["Marry"],
"tokenInterval":{
"start":1,
"end":2
},
"characterStartOffset":6,
"characterEndOffset":13,
"sentence":0,
"document":"-1180172198",
"keep":true,
"foundBy":"marry-syntax-1"
},
"arguments":{
"spouse":[{
"type":"TextBoundMention",
"id":"T:1618195043",
"text":"Gonzo",
"labels":["Person","PossiblePerson","Entity"],
"tokenInterval":{
"start":0,
"end":1
},
"characterStartOffset":0,
"characterEndOffset":5,
"sentence":0,
"document":"-1180172198",
"keep":true,
"foundBy":"ner-person"
},{
"type":"TextBoundMention",
"id":"T:648733472",
"text":"Camilla",
"labels":["Person","PossiblePerson","Entity"],
"tokenInterval":{
"start":2,
"end":3
},
"characterStartOffset":14,
"characterEndOffset":21,
"sentence":0,
"document":"-1180172198",
"keep":true,
"foundBy":"ner-person"
}]
},
"paths":{
"spouse":{
"T:1618195043":[{
"source":1,
"destination":0,
"relation":"nsubj"
}],
"T:648733472":[{
"source":1,
"destination":2,
"relation":"dobj"
}]
}
},
"tokenInterval":{
"start":0,
"end":3
},
"characterStartOffset":0,
"characterEndOffset":21,
"sentence":0,
"document":"-1180172198",
"keep":true,
"foundBy":"marry-syntax-1"
},{
"type":"TextBoundMention",
"id":"T:1618195043",
"text":"Gonzo",
"labels":["Person","PossiblePerson","Entity"],
"tokenInterval":{
"start":0,
"end":1
},
"characterStartOffset":0,
"characterEndOffset":5,
"sentence":0,
"document":"-1180172198",
"keep":true,
"foundBy":"ner-person"
}]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment