Skip to content

Instantly share code, notes, and snippets.

@mattdeboard
Created June 21, 2013 04:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mattdeboard/5828696 to your computer and use it in GitHub Desktop.
Save mattdeboard/5828696 to your computer and use it in GitHub Desktop.
(defn ExtractRenderListener
"A RenderListener implementation that extracts images from a PDF and
writes them to disk."
[^String path]
(reify RenderListener
(renderImage [_ renderInfo]
(let [image (.getImage renderInfo)
refNumber (.getNumber (.getRef renderInfo))
fileType (.getFileType image)]
(if image
(let [filename (str path refNumber "." fileType)]
(with-open [os (clojure.java.io/output-stream filename)]
(.write os (.getImageAsBytes image))
(.flush os))))))
(beginTextBlock [_] nil)
(endTextBlock [_] nil)
(renderText [_ renderInfo] nil)))
(defn MatrixRenderListener
"A RenderListener implementation that updates shared mutable state with
nested vectors of image coordinate data."
[^Integer page]
(reify RenderListener
(renderImage [_ renderInfo]
(let [matrix (. renderInfo getImageCTM)
x (.get matrix 6)
y (.get matrix 7)
w (.get matrix 0)
h (.get matrix 4)
pagekey (keyword (str page))]
(dosync (alter coords update-in [pagekey] #(conj % [x y w h]))))
nil)
(beginTextBlock [_] nil)
(endTextBlock [_] nil)
(renderText [_ renderInfo] nil)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment