convert pdf files to text for further parsing
(ns pdf-text-extractor.core
(:import [com.snowtide.pdf OutputTarget PDFTextStream]))
(defn pdf-to-text [file]
(let [pdfts (new PDFTextStream file)
output (new StringBuilder 1024)]
(.pipe pdfts (new OutputTarget output))
(.close pdfts)
(defn -main [& args]
(println (apply str
(interleave (map pdf-to-text args) (repeat "\n------------------------------------\n")))))
(defproject pdf-text-extractor "0.1.0-SNAPSHOT"
:description "FIXME: write description"
:url ""
:license {:name "Eclipse Public License"
:url ""}
:dependencies [[org.clojure/clojure "1.5.1"]
[com.snowtide/pdftextstream "2.6.4"]]
:repositories [["snowtide-releases" ""]]
:main pdf-text-extractor.core)
