Created
January 4, 2010 15:25
-
-
Save zcox/268580 to your computer and use it in GitHub Desktop.
Revised Scala version from this blog post: http://blogs.sourceallies.com/2009/12/word-counts-example-in-ruby-and-scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io._ | |
val rootDir = new File("/home/zcox/dev/20_newsgroups") | |
if (!rootDir.exists) throw new IllegalArgumentException(rootDir + " does not exist") | |
var counts = Map.empty[String, Int].withDefaultValue(0) | |
files(rootDir) { _.split("""\W+""").foreach { word => counts = counts(word.toLowerCase) += 1 }} | |
write(counts, "counts-descreasing-scala") {_._2 > _._2} | |
write(counts, "counts-alphabetical-scala") {_._1 < _._1} | |
/** Writes the specified map to the specified file in tab-delimited format, sorted accordingly. */ | |
def write[K, V](map: Map[K, V], file: String)(sort: (Tuple2[K, V], Tuple2[K, V]) => Boolean) { | |
using (new PrintWriter(new FileWriter(file))) { out => | |
map.toList.sort(sort).foreach { pair => out.println(pair._1 + "\t" + pair._2) } | |
} | |
} | |
/** Iterates over all files under rootDir, opens each one and passes it to the function */ | |
def files(rootDir: File)(process: File => Unit) { | |
for (dir <- rootDir.listFiles; if dir.isDirectory) { | |
for (file <- dir.listFiles; if file.isFile) { | |
process(file) | |
} | |
} | |
} | |
/** Converts a File to a String. */ | |
implicit def file2String(file: File): String = scala.io.Source.fromFile(file, "ISO-8859-1").mkString | |
/** Performs some operation on the specified closeable object and then ensures it gets closed. */ | |
def using[Closeable <: {def close(): Unit}, B](closeable: Closeable)(getB: Closeable => B): B = | |
try { | |
getB(closeable) | |
} finally { | |
closeable.close() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment