Skip to content

Instantly share code, notes, and snippets.

@beala
Created August 4, 2012 03:55
Show Gist options
  • Save beala/3254234 to your computer and use it in GitHub Desktop.
Save beala/3254234 to your computer and use it in GitHub Desktop.
n-gram
// Some Scala implementations of concepts from "Natural Language Processing For The Working Programmer"
// http://nlpwp.org/book/chap-ngrams.xhtml
/**
* Return all lists of n adjacent words.
* ngram("I love Scala!", 2) = List(List(I, love), List(love, Scala!)
*/
def ngram(sent:String, n:Int):List[List[String]]={
def helper(sent:List[String]):List[List[String]] = {
sent match{
case(h::t) if(sent.length >= n) => sent.take(n) :: helper(t)
case(_) => Nil
}
}
helper(sent.split(' ').toList)
}
println(ngram("I love Scala!", 2))
/**
* Return all pairs of words where the first word comes before the second word.
* skipBigram("I love Scala!") == List(List(I, love), List(I, Scala!), List(love, Scala!))
*/
def skipBigram(sent:String):List[List[String]]={
def helper(sent:List[String]):List[List[String]]={
sent match{
case(h::t) => (for(word:String <- t) yield List(h, word)) ++ helper(t)
case(Nil) => Nil
}
}
return helper(sent.split(' ').toList)
}
println(skipBigram("I love Scala!"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment