Skip to content

Instantly share code, notes, and snippets.

@prassee
Created September 5, 2013 04:26
Show Gist options
  • Save prassee/6446101 to your computer and use it in GitHub Desktop.
Save prassee/6446101 to your computer and use it in GitHub Desktop.
chalk nlp
package nlpapp
import chalk.tools.sentdetect.SentenceDetectorME
import chalk.tools.sentdetect.SentenceModel
import java.io.FileInputStream
import chalk.tools.postag.POSModel
import chalk.tools.postag.POSTaggerME
import chalk.tools.tokenize.TokenizerModel
import chalk.tools.tokenize.TokenizerME
object WordTagger extends App {
val tweetString = "CSSCorp is a global company. " +
"The company's labs entity is providing services in Cloud and BigData Technologies."
val fis = (x: String) => new FileInputStream(x)
val model = (x: FileInputStream) => new SentenceModel(x)
val detector = (x: SentenceModel) => new SentenceDetectorME(x)
val tokenModel = (x: FileInputStream) => new TokenizerModel(x)
val tokenDetector = (x: TokenizerModel) => new TokenizerME(x)
val posmodel = (x: FileInputStream) => new POSModel(x)
val posdetector = (x: POSModel) => new POSTaggerME(x)
def splitSentence = {
val senDet = detector(model(fis(this.getClass().getResource("/en-sent.bin").getPath())))
def findSentences(sme: SentenceDetectorME) = (x: String) => sme.sentDetect(x)
findSentences(senDet)
}
val sentences = splitSentence(tweetString)
val postagr = posdetector(posmodel(fis(this.getClass().getResource("/en-pos-maxent.bin").getPath()))).tag(sentences(1))
println(postagr)
// output
// The/DT company's/NNS labs/NNS entity/NN is/VBZ providing/VBG services/NNS in/IN Cloud/NNP and/CC BigData/NNP Technologies./NNP
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment