myedibleenso/JSONExample.scala

## document.json
{
  "sentences":[{
    "words":["Gonzo","married","Camilla","."],
    "startOffsets":[0,6,14,21],
    "endOffsets":[5,13,21,22],
    "tags":["NNP","VBD","NNP","."],
    "lemmas":["Gonzo","marry","Camilla","."],
    "entities":["O","O","PERSON","O"],
    "norms":["O","O","O","O"],
    "chunks":["B-NP","B-VP","B-NP","O"],
    "graphs":{
      "stanford-basic":{
        "edges":[{
          "source":1,
          "destination":0,
          "relation":"nsubj"
        },{
          "source":1,
          "destination":2,
          "relation":"dobj"
        },{
          "source":1,
          "destination":3,
          "relation":"punct"
        }],
        "roots":[1]
      },
      "stanford-collapsed":{
        "edges":[{
          "source":1,
          "destination":0,
          "relation":"nsubj"
        },{
          "source":1,
          "destination":2,
          "relation":"dobj"
        },{
          "source":1,
          "destination":3,
          "relation":"punct"
        }],
        "roots":[1]
      }
    }
  }]
}

## JSONExample.scala
import org.clulab.processors.fastnlp.FastNLPProcessor
import org.clulab.processors.Document
import org.clulab.struct.Interval
import org.clulab.odin._
import org.clulab.odin.serialization.json.{JSONSerializer => OdinJSONSerializer, _}
import org.clulab.serialization.json._
import java.io.File


/** Example demonstrating how to serialize/deserialize
 * [[org.clulab.Document]] and [[org.clulab.odin.Mention]] to/from json
 */
object JSONExample extends App {
  val rules =
    """
      |# NE rules
      |
      |- name: "ner-person"
      |  label: [Person, PossiblePerson, Entity]
      |  priority: 1
      |  type: token
      |  pattern: |
      |   ([entity="PERSON"]+ | "Gonzo")
      |
      |# Events
      |
      |# optional location and date
      |- name: "marry-syntax-1"
      |  label: [Marry]
      |  priority: 3
      |  example: "He married Jane last June in Hawaii."
      |  type: dependency
      |  pattern: |
      |    trigger = [lemma="marry"]
      |    spouse: Entity+ = <xcomp? /^nsubj/ | dobj
    """.stripMargin
  val engine = ExtractorEngine(rules)

  val proc = new FastNLPProcessor
  val text = "Gonzo married Camilla."
  val doc = proc.annotate(text)
  val mentions = engine.extractFrom(doc)

  // print the json string for the extracted mentions
  println(mentions.json(pretty=true))

  // save the json to a file
  mentions.saveJSON("mentions.json", pretty=true)

  // deserialize the mentions json that was written in the last step
  val mentions2 = OdinJSONSerializer.toMentions(new File("mentions.json"))

  // demonstrate span equivalence to the original mentions
  def m2triple(m: Mention):(Int, Int, Interval) = (m.document.equivalenceHash, m.sentence, m.tokenInterval)
  mentions.map(m2triple) == mentions2.map(m2triple)

  // documents can also be serialized/deserialized
  doc.saveJSON("document.json", pretty=true)
  val d2 = JSONSerializer.toDocument(new File("document.json"))

  // demonstrate equivalence of annotations
  doc.equivalenceHash == d2.equivalenceHash
}

## mentions.json
{
  "documents":{
    "-1180172198":{
      "sentences":[{
        "words":["Gonzo","married","Camilla","."],
        "startOffsets":[0,6,14,21],
        "endOffsets":[5,13,21,22],
        "tags":["NNP","VBD","NNP","."],
        "lemmas":["Gonzo","marry","Camilla","."],
        "entities":["O","O","PERSON","O"],
        "norms":["O","O","O","O"],
        "chunks":["B-NP","B-VP","B-NP","O"],
        "graphs":{
          "stanford-basic":{
            "edges":[{
              "source":1,
              "destination":0,
              "relation":"nsubj"
            },{
              "source":1,
              "destination":2,
              "relation":"dobj"
            },{
              "source":1,
              "destination":3,
              "relation":"punct"
            }],
            "roots":[1]
          },
          "stanford-collapsed":{
            "edges":[{
              "source":1,
              "destination":0,
              "relation":"nsubj"
            },{
              "source":1,
              "destination":2,
              "relation":"dobj"
            },{
              "source":1,
              "destination":3,
              "relation":"punct"
            }],
            "roots":[1]
          }
        }
      }]
    }
  },
  "mentions":[{
    "type":"TextBoundMention",
    "id":"T:648733472",
    "text":"Camilla",
    "labels":["Person","PossiblePerson","Entity"],
    "tokenInterval":{
      "start":2,
      "end":3
    },
    "characterStartOffset":14,
    "characterEndOffset":21,
    "sentence":0,
    "document":"-1180172198",
    "keep":true,
    "foundBy":"ner-person"
  },{
    "type":"EventMention",
    "id":"E:1351231268",
    "text":"Gonzo married Camilla",
    "labels":["Marry"],
    "trigger":{
      "type":"TextBoundMention",
      "id":"T:1627076846",
      "text":"married",
      "labels":["Marry"],
      "tokenInterval":{
        "start":1,
        "end":2
      },
      "characterStartOffset":6,
      "characterEndOffset":13,
      "sentence":0,
      "document":"-1180172198",
      "keep":true,
      "foundBy":"marry-syntax-1"
    },
    "arguments":{
      "spouse":[{
        "type":"TextBoundMention",
        "id":"T:1618195043",
        "text":"Gonzo",
        "labels":["Person","PossiblePerson","Entity"],
        "tokenInterval":{
          "start":0,
          "end":1
        },
        "characterStartOffset":0,
        "characterEndOffset":5,
        "sentence":0,
        "document":"-1180172198",
        "keep":true,
        "foundBy":"ner-person"
      },{
        "type":"TextBoundMention",
        "id":"T:648733472",
        "text":"Camilla",
        "labels":["Person","PossiblePerson","Entity"],
        "tokenInterval":{
          "start":2,
          "end":3
        },
        "characterStartOffset":14,
        "characterEndOffset":21,
        "sentence":0,
        "document":"-1180172198",
        "keep":true,
        "foundBy":"ner-person"
      }]
    },
    "paths":{
      "spouse":{
        "T:1618195043":[{
          "source":1,
          "destination":0,
          "relation":"nsubj"
        }],
        "T:648733472":[{
          "source":1,
          "destination":2,
          "relation":"dobj"
        }]
      }
    },
    "tokenInterval":{
      "start":0,
      "end":3
    },
    "characterStartOffset":0,
    "characterEndOffset":21,
    "sentence":0,
    "document":"-1180172198",
    "keep":true,
    "foundBy":"marry-syntax-1"
  },{
    "type":"TextBoundMention",
    "id":"T:1618195043",
    "text":"Gonzo",
    "labels":["Person","PossiblePerson","Entity"],
    "tokenInterval":{
      "start":0,
      "end":1
    },
    "characterStartOffset":0,
    "characterEndOffset":5,
    "sentence":0,
    "document":"-1180172198",
    "keep":true,
    "foundBy":"ner-person"
  }]
}
	{
	"sentences":[{
	"words":["Gonzo","married","Camilla","."],
	"startOffsets":[0,6,14,21],
	"endOffsets":[5,13,21,22],
	"tags":["NNP","VBD","NNP","."],
	"lemmas":["Gonzo","marry","Camilla","."],
	"entities":["O","O","PERSON","O"],
	"norms":["O","O","O","O"],
	"chunks":["B-NP","B-VP","B-NP","O"],
	"graphs":{
	"stanford-basic":{
	"edges":[{
	"source":1,
	"destination":0,
	"relation":"nsubj"
	},{
	"source":1,
	"destination":2,
	"relation":"dobj"
	},{
	"source":1,
	"destination":3,
	"relation":"punct"
	}],
	"roots":[1]
	},
	"stanford-collapsed":{
	"edges":[{
	"source":1,
	"destination":0,
	"relation":"nsubj"
	},{
	"source":1,
	"destination":2,
	"relation":"dobj"
	},{
	"source":1,
	"destination":3,
	"relation":"punct"
	}],
	"roots":[1]
	}
	}
	}]
	}
	import org.clulab.processors.fastnlp.FastNLPProcessor
	import org.clulab.processors.Document
	import org.clulab.struct.Interval
	import org.clulab.odin._
	import org.clulab.odin.serialization.json.{JSONSerializer => OdinJSONSerializer, _}
	import org.clulab.serialization.json._
	import java.io.File


	/** Example demonstrating how to serialize/deserialize
	* [[org.clulab.Document]] and [[org.clulab.odin.Mention]] to/from json
	*/
	object JSONExample extends App {
	val rules =
	"""
	\|# NE rules
	\|
	\|- name: "ner-person"
	\| label: [Person, PossiblePerson, Entity]
	\| priority: 1
	\| type: token
	\| pattern: \|
	\| ([entity="PERSON"]+ \| "Gonzo")
	\|
	\|# Events
	\|
	\|# optional location and date
	\|- name: "marry-syntax-1"
	\| label: [Marry]
	\| priority: 3
	\| example: "He married Jane last June in Hawaii."
	\| type: dependency
	\| pattern: \|
	\| trigger = [lemma="marry"]
	\| spouse: Entity+ = <xcomp? /^nsubj/ \| dobj
	""".stripMargin
	val engine = ExtractorEngine(rules)

	val proc = new FastNLPProcessor
	val text = "Gonzo married Camilla."
	val doc = proc.annotate(text)
	val mentions = engine.extractFrom(doc)

	// print the json string for the extracted mentions
	println(mentions.json(pretty=true))

	// save the json to a file
	mentions.saveJSON("mentions.json", pretty=true)

	// deserialize the mentions json that was written in the last step
	val mentions2 = OdinJSONSerializer.toMentions(new File("mentions.json"))

	// demonstrate span equivalence to the original mentions
	def m2triple(m: Mention):(Int, Int, Interval) = (m.document.equivalenceHash, m.sentence, m.tokenInterval)
	mentions.map(m2triple) == mentions2.map(m2triple)

	// documents can also be serialized/deserialized
	doc.saveJSON("document.json", pretty=true)
	val d2 = JSONSerializer.toDocument(new File("document.json"))

	// demonstrate equivalence of annotations
	doc.equivalenceHash == d2.equivalenceHash
	}
	{
	"documents":{
	"-1180172198":{
	"sentences":[{
	"words":["Gonzo","married","Camilla","."],
	"startOffsets":[0,6,14,21],
	"endOffsets":[5,13,21,22],
	"tags":["NNP","VBD","NNP","."],
	"lemmas":["Gonzo","marry","Camilla","."],
	"entities":["O","O","PERSON","O"],
	"norms":["O","O","O","O"],
	"chunks":["B-NP","B-VP","B-NP","O"],
	"graphs":{
	"stanford-basic":{
	"edges":[{
	"source":1,
	"destination":0,
	"relation":"nsubj"
	},{
	"source":1,
	"destination":2,
	"relation":"dobj"
	},{
	"source":1,
	"destination":3,
	"relation":"punct"
	}],
	"roots":[1]
	},
	"stanford-collapsed":{
	"edges":[{
	"source":1,
	"destination":0,
	"relation":"nsubj"
	},{
	"source":1,
	"destination":2,
	"relation":"dobj"
	},{
	"source":1,
	"destination":3,
	"relation":"punct"
	}],
	"roots":[1]
	}
	}
	}]
	}
	},
	"mentions":[{
	"type":"TextBoundMention",
	"id":"T:648733472",
	"text":"Camilla",
	"labels":["Person","PossiblePerson","Entity"],
	"tokenInterval":{
	"start":2,
	"end":3
	},
	"characterStartOffset":14,
	"characterEndOffset":21,
	"sentence":0,
	"document":"-1180172198",
	"keep":true,
	"foundBy":"ner-person"
	},{
	"type":"EventMention",
	"id":"E:1351231268",
	"text":"Gonzo married Camilla",
	"labels":["Marry"],
	"trigger":{
	"type":"TextBoundMention",
	"id":"T:1627076846",
	"text":"married",
	"labels":["Marry"],
	"tokenInterval":{
	"start":1,
	"end":2
	},
	"characterStartOffset":6,
	"characterEndOffset":13,
	"sentence":0,
	"document":"-1180172198",
	"keep":true,
	"foundBy":"marry-syntax-1"
	},
	"arguments":{
	"spouse":[{
	"type":"TextBoundMention",
	"id":"T:1618195043",
	"text":"Gonzo",
	"labels":["Person","PossiblePerson","Entity"],
	"tokenInterval":{
	"start":0,
	"end":1
	},
	"characterStartOffset":0,
	"characterEndOffset":5,
	"sentence":0,
	"document":"-1180172198",
	"keep":true,
	"foundBy":"ner-person"
	},{
	"type":"TextBoundMention",
	"id":"T:648733472",
	"text":"Camilla",
	"labels":["Person","PossiblePerson","Entity"],
	"tokenInterval":{
	"start":2,
	"end":3
	},
	"characterStartOffset":14,
	"characterEndOffset":21,
	"sentence":0,
	"document":"-1180172198",
	"keep":true,
	"foundBy":"ner-person"
	}]
	},
	"paths":{
	"spouse":{
	"T:1618195043":[{
	"source":1,
	"destination":0,
	"relation":"nsubj"
	}],
	"T:648733472":[{
	"source":1,
	"destination":2,
	"relation":"dobj"
	}]
	}
	},
	"tokenInterval":{
	"start":0,
	"end":3
	},
	"characterStartOffset":0,
	"characterEndOffset":21,
	"sentence":0,
	"document":"-1180172198",
	"keep":true,
	"foundBy":"marry-syntax-1"
	},{
	"type":"TextBoundMention",
	"id":"T:1618195043",
	"text":"Gonzo",
	"labels":["Person","PossiblePerson","Entity"],
	"tokenInterval":{
	"start":0,
	"end":1
	},
	"characterStartOffset":0,
	"characterEndOffset":5,
	"sentence":0,
	"document":"-1180172198",
	"keep":true,
	"foundBy":"ner-person"
	}]
	}