Skip to content

Instantly share code, notes, and snippets.

Created July 22, 2012 21:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/3161132 to your computer and use it in GitHub Desktop.
Save anonymous/3161132 to your computer and use it in GitHub Desktop.
(ns hook.core
(:require hook.image :as image)
(:use hook.util
(seesaw core
[chooser :only [choose-file]]
[graphics :only [draw style anti-alias circle rect]]
[color :only [color]])
clojure.xml
clojure.math.numeric-tower
[clojure.zip :only [xml-zip]]
[clojure.data.zip.xml :only [xml1-> attr]]))
(defn page-width [xl]
(Double. (xml1-> (xml-zip xl) :page (attr :width))))
(defn page-height [xl]
(Double. (xml1-> (xml-zip xl) :page (attr :height))))
(defn page-skew [xl]
(Double. (xml1-> (xml-zip xl) :page (attr :skewAngle))))
(defn char-rect [xl]
(let [x (Integer. (:l (:attrs xl)))
y (Integer. (:t (:attrs xl)))
w (abs (- x (Integer. (:r (:attrs xl)))))
h (abs (- y (Integer. (:b (:attrs xl)))))]
{:x x :y y :width w :height h}))
(defn ocr-chars
""
([{img :image mta :meta}] (ocr-chars img mta))
([img mta]
(let [height (page-height mta)
skew (- (page-skew mta))
timg (image/rotate img skew 0 height)]
(for [e (xml-seq xl) :when (= :charParams (:tag e))]
{:image (image/subimage timg (char-rect e))
:character (:first (:content e))
:confidence (:charConfidence (:attrs e))}))))
(defn annotate-chars! [img xl]
(let [g (anti-alias (.createGraphics img))]
(.rotate g (page-skew xl) 0 (page-height xl))
(doseq [e (xml-seq xl) :when (= :charParams (:tag e))]
(draw g (apply rect (vals (char-rect e))) (style :foreground (color :black))))))
(defn make-image []
(let [img (image/load "page.tif")]
(scrollable (label :icon (:image (first (ocr-chars img (parse (file "annot.xml")))))))))
(defn make-frame []
(frame
:title "hook"
:size [600 :by 600]
:on-close :exit
:content (make-image)))
(defn image-meta-pairs
"Extracts pairs of images and their describing xml files."
[dir]
(let [file-pairs (vals (group-by file-name (file-seq dir)))]
(map #({:img (first (filter img? %))
:meta (first (filter meta? %))})
file-pairs)))
(defn confidence-examples [src dest]
(let [all (apply concat (map ocr-chars (image-meta-pairs src)))
groups (group-by #(select-keys % [:character :confidence]) all)
dst-pth (.getName dst)]
(for [grp groups :let [els (second g)
chr (:character (first g))
cnf (:confidence (first g))]]
(for [e els i (range)]
(image/store (:image e) (str dst-pth "/" chr "/" cnf "/" i ".tiff"))))))
(defn choose-dest [_ src]
(choose-file :selection-mode :dirs-only
:success-fn (fn [_ dst] (confidence-examples src dst))
:cancel-fn (fn [_] (System/exit 0))))
(defn choose-source []
(choose-file :selection-mode :dirs-only
:success-fn choose-dest
:cancel-fn (fn [_] (System/exit 0))))
(defn -main [& args]
(invoke-later (choose-source)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment