veekaybee/simpleScaldingJob.sc

## simpleScaldingJob.sc
import com.twitter.scalding._

class WordCountJob(args: Args) extends Job(args) {

val lines = TypedPipe.from(TextLine("posts.txt"))

lines.flatMap { line => tokenize(line) }
    .groupBy { word => word }
    .size
    .groupAll
    .sortBy{ case (word, count) => -count}
    .take(10)
    .dump

  // Split a piece of text into individual words.
  def tokenize(text: String): Array[String] = {
	// Lowercase each word and remove punctuation.
	text.toLowerCase.replaceAll("[^a-zA-Z0-9\\s]", "").split("\\s+")
  }
}
	import com.twitter.scalding._

	class WordCountJob(args: Args) extends Job(args) {

	val lines = TypedPipe.from(TextLine("posts.txt"))

	lines.flatMap { line => tokenize(line) }
	.groupBy { word => word }
	.size
	.groupAll
	.sortBy{ case (word, count) => -count}
	.take(10)
	.dump

	// Split a piece of text into individual words.
	def tokenize(text: String): Array[String] = {
	// Lowercase each word and remove punctuation.
	text.toLowerCase.replaceAll("[^a-zA-Z0-9\\s]", "").split("\\s+")
	}
	}