Skip to content

Instantly share code, notes, and snippets.

@reyman
Last active February 20, 2019 17:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save reyman/91d528ca292c777701c7605e84550411 to your computer and use it in GitHub Desktop.
Save reyman/91d528ca292c777701c7605e84550411 to your computer and use it in GitHub Desktop.
import gate._
import gate.creole._
import gate.util.persistence._
import gate.corpora._
import scala.collection.JavaConverters._
import scala.collection.mutable.HashSet
object TwitterPrototype extends App {
Gate.init()
val anniePlugin = new Plugin.Maven("uk.ac.gate.plugins", "annie", "8.6-SNAPSHOT")
val annieFrenchPlugin = new Plugin.Maven("uk.ac.gate.plugins", "lang-french", "8.6-SNAPSHOT")
val twitterJsonPlugin = new Plugin.Maven("uk.ac.gate.plugins", "format-twitter", "8.6-SNAPSHOT")
val twitterPlugin = new Plugin.Maven("uk.ac.gate.plugins", "twitter", "8.6-SNAPSHOT")
//Gate.getCreoleRegister.registerPlugin(anniePlugin)
//Gate.getCreoleRegister.registerPlugin(annieFrenchPlugin)
Gate.getCreoleRegister.registerPlugin(twitterJsonPlugin)
Gate.getCreoleRegister.registerPlugin(twitterPlugin)
val resourceDef = new ResourceReference(twitterPlugin,"resources/twitie-english-only.gapp").toURL
val controller = PersistenceManager.loadObjectFromUrl(resourceDef).asInstanceOf[ConditionalSerialAnalyserController]
//val annieController= Factory.createResource("gate.creole.SerialAnalyserController", Factory.newFeatureMap(), Factory.newFeatureMap(), "ANNIE").asInstanceOf[SerialAnalyserController]
def infoPRS (controller:CorpusController) ={
val PRNames = controller.getPRs().asScala.map{
pr => pr.getName()
}
println(PRNames.mkString(","))
}
infoPRS(controller)
def processString(controller:CorpusController, text:String) = {
val corpus = Factory.newCorpus("TwitIE Corpus")
val doc = Factory.newDocument(text)
corpus.add(doc)
controller.setCorpus(corpus)
controller.execute()
processResults(doc,corpus)
Factory.deleteResource(doc)
Factory.deleteResource(corpus)
}
def processResults(doc:Document, corpus:Corpus): Unit = {
val annotationTypesRequired= HashSet("Person","Location","Organization","UserID", "Emoticon","Hashtag").asJava
val c = corpus.iterator().asScala
val listOfAnnotationSet :Seq[AnnotationSet] = c.map{ c =>
val doc = c.asInstanceOf[Document]
doc.setPreserveOriginalContent(true)
doc.getAnnotations().get(annotationTypesRequired)
}.toSeq
listOfAnnotationSet.map{ aset =>
aset.asScala.map{ a =>
println("Type = " + a.getType() + " = " + Utils.stringFor(doc,a))
}
}
}
processString(controller,"Hi @seb :) , i'm really happy to see you with @paul and @axt in #paris #eiffeltower ! ")
}
@ianroberts
Copy link

ianroberts commented Feb 20, 2019

Nice, just a comment on lines 43-44 - don't use new DocumentImpl and setContent, instead use Factory.newDocument(text), and then make sure you pass both the document and the corpus to Factory.deleteResource before the method returns.

@reyman
Copy link
Author

reyman commented Feb 20, 2019

Thanks @ianroberts, code corrected :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment