Skip to content

Instantly share code, notes, and snippets.

@kumanan
Created November 7, 2011 03:04
Show Gist options
  • Save kumanan/1344093 to your computer and use it in GitHub Desktop.
Save kumanan/1344093 to your computer and use it in GitHub Desktop.
Scored word generation
val PIPELINE = new AnnotationPipeline(
new PtbRegexTokenizer, new StanfordMaxentTagger, new StanfordNameTagger, new HyphenWordExtractorAnnotator,
new MweExtractorAnnotator(GLOSSARY_WORDS), new SimplePatternAnnotator
)
def toScoredWords(md : Metadata , as : AnnotatedSentence) : java.util.List[ScoredWord] = {
val annoSentence = toAnnoSentence(as)
annoSentence.setMetadata(md)
PIPELINE.process(annoSentence)
val scoredWords = new ArrayList[ScoredWord]
ScoredWordExtractor.getScoredWords(annoSentence, List("tokens", "hyphenword", "ne", "mwe"), Some(GLOSSARY_WORDS), true, new ScoredWordRanker).foreach(swa => {
val sw = toScoredWord(swa)
sw.setSentenceId(as.getId)
scoredWords.add(sw)
})
scoredWords
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment