Skip to content

Instantly share code, notes, and snippets.

@sylvia43
Created December 2, 2015 17:54
Show Gist options
  • Save sylvia43/bdd4b7f66e85cda92fde to your computer and use it in GitHub Desktop.
Save sylvia43/bdd4b7f66e85cda92fde to your computer and use it in GitHub Desktop.
Count the number of words in all Word documents in a directory.
import java.io.{File, FileInputStream, FilenameFilter}
import org.apache.poi.xwpf.extractor.XWPFWordExtractor
import org.apache.poi.xwpf.usermodel.XWPFDocument
object Main {
def main (args: Array[String]) {
println(getCountFromFolder("E:\\SchoolWork\\Running Start"))
}
def getCountFromFolder(folderName: String): Int = {
new File(folderName).listFiles(new FilenameFilter {
override def accept(dir: File, name: String): Boolean = name.endsWith(".docx")
}).foldLeft(0)((acc, file) => acc + getCountFromFile(file))
}
def getCountFromFile(file: File): Int = {
new XWPFWordExtractor(new XWPFDocument(new FileInputStream(file.getAbsolutePath)))
.getText.split("\\W+").length
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment