Created
July 22, 2012 21:53
-
-
Save anonymous/3161132 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns hook.core | |
(:require hook.image :as image) | |
(:use hook.util | |
(seesaw core | |
[chooser :only [choose-file]] | |
[graphics :only [draw style anti-alias circle rect]] | |
[color :only [color]]) | |
clojure.xml | |
clojure.math.numeric-tower | |
[clojure.zip :only [xml-zip]] | |
[clojure.data.zip.xml :only [xml1-> attr]])) | |
(defn page-width [xl] | |
(Double. (xml1-> (xml-zip xl) :page (attr :width)))) | |
(defn page-height [xl] | |
(Double. (xml1-> (xml-zip xl) :page (attr :height)))) | |
(defn page-skew [xl] | |
(Double. (xml1-> (xml-zip xl) :page (attr :skewAngle)))) | |
(defn char-rect [xl] | |
(let [x (Integer. (:l (:attrs xl))) | |
y (Integer. (:t (:attrs xl))) | |
w (abs (- x (Integer. (:r (:attrs xl))))) | |
h (abs (- y (Integer. (:b (:attrs xl)))))] | |
{:x x :y y :width w :height h})) | |
(defn ocr-chars | |
"" | |
([{img :image mta :meta}] (ocr-chars img mta)) | |
([img mta] | |
(let [height (page-height mta) | |
skew (- (page-skew mta)) | |
timg (image/rotate img skew 0 height)] | |
(for [e (xml-seq xl) :when (= :charParams (:tag e))] | |
{:image (image/subimage timg (char-rect e)) | |
:character (:first (:content e)) | |
:confidence (:charConfidence (:attrs e))})))) | |
(defn annotate-chars! [img xl] | |
(let [g (anti-alias (.createGraphics img))] | |
(.rotate g (page-skew xl) 0 (page-height xl)) | |
(doseq [e (xml-seq xl) :when (= :charParams (:tag e))] | |
(draw g (apply rect (vals (char-rect e))) (style :foreground (color :black)))))) | |
(defn make-image [] | |
(let [img (image/load "page.tif")] | |
(scrollable (label :icon (:image (first (ocr-chars img (parse (file "annot.xml"))))))))) | |
(defn make-frame [] | |
(frame | |
:title "hook" | |
:size [600 :by 600] | |
:on-close :exit | |
:content (make-image))) | |
(defn image-meta-pairs | |
"Extracts pairs of images and their describing xml files." | |
[dir] | |
(let [file-pairs (vals (group-by file-name (file-seq dir)))] | |
(map #({:img (first (filter img? %)) | |
:meta (first (filter meta? %))}) | |
file-pairs))) | |
(defn confidence-examples [src dest] | |
(let [all (apply concat (map ocr-chars (image-meta-pairs src))) | |
groups (group-by #(select-keys % [:character :confidence]) all) | |
dst-pth (.getName dst)] | |
(for [grp groups :let [els (second g) | |
chr (:character (first g)) | |
cnf (:confidence (first g))]] | |
(for [e els i (range)] | |
(image/store (:image e) (str dst-pth "/" chr "/" cnf "/" i ".tiff")))))) | |
(defn choose-dest [_ src] | |
(choose-file :selection-mode :dirs-only | |
:success-fn (fn [_ dst] (confidence-examples src dst)) | |
:cancel-fn (fn [_] (System/exit 0)))) | |
(defn choose-source [] | |
(choose-file :selection-mode :dirs-only | |
:success-fn choose-dest | |
:cancel-fn (fn [_] (System/exit 0)))) | |
(defn -main [& args] | |
(invoke-later (choose-source))) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment