Skip to content

Instantly share code, notes, and snippets.

@yawaramin
Last active April 22, 2016 18:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yawaramin/447a4a3fd6a3383a6cf7bf7babd4b0cb to your computer and use it in GitHub Desktop.
Save yawaramin/447a4a3fd6a3383a6cf7bf7babd4b0cb to your computer and use it in GitHub Desktop.
Asynchronous and streaming file word counter
import java.nio.file.{ Files, Path, Paths }
import resource._
import scala.collection.JavaConversions.iterableAsScalaIterable
import scala.concurrent.duration.Duration
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.{ Await, Future }
object Main {
/** An asynchronous function (inspired by Finagle). */
type Async[A, B] = A => Future[B]
val listFiles: String Async Traversable[Path] = dirName =>
Future {
val pathsResource =
managed(Files newDirectoryStream (Paths get dirName)) map {
iterableAsScalaIterable
}
pathsResource.toTraversable
}
def lineWordCount(line: String): Int = (line split " ").length
val fileWordsCount: Path Async Int = path =>
Future {
val fileName = path.toString
val startTime = System.nanoTime
val fileLines =
managed(io.Source fromFile fileName) map (_.getLines.toIterable)
val wordsCount = (fileLines.toTraversable map lineWordCount).sum
val elapsedTime = (System.nanoTime - startTime).toDouble / 1000000
println(
s"File: $fileName, word count: $wordsCount, elapsed time: $elapsedTime ms")
wordsCount
}
/**
Runs the word count app.
Usage:
sbt> run DIRNAME
*/
def main(args: Array[String]): Unit = {
val wordsCountFuture =
for {
files <- listFiles(args(0))
filesWordsCount <-
(files map fileWordsCount).foldRight(Future successful 0) {
(a, b) => for { aVal <- a; bVal <- b } yield aVal + bVal
}
} yield filesWordsCount
println(Await.result(wordsCountFuture, Duration.Inf))
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment