OCR with Clojure. See the blog post at rhnh.net.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns ocr.main | |
(:use [clojure.string :only (split trim)]) | |
(:use [clojure.contrib.duck-streams :only (write-lines read-lines)]) | |
(:use [clojure.contrib.shell-out :only (sh)]) | |
(:use [clojure.contrib.math :only (sqrt)])) | |
; Input handling | |
(defn read-text-image-line [line] | |
(if (= "white" (last (split line #"[,:\s]+"))) "0" "1")) | |
(defn load-text-image | |
"Loads a black and white image stored in imagemagick's text format | |
into a bitmap with '0' representing white and '1' black." | |
[filename] | |
(let [lines (vec (drop 1 (read-lines filename))) | |
converted (map read-text-image-line lines) | |
] | |
(map #(apply str %) (partition 32 converted)))) | |
(defn convert-image | |
"Convert any image into the format required by the classifier." | |
[in out] | |
(sh "convert" in "-colorspace" "gray" "+dither" "-colors" "2" | |
"-normalize" "-resize" "32x32!" out) | |
(write-lines out (load-text-image out))) | |
(def temp-outfile "/tmp/clj-converted.txt") | |
; Training Data | |
(defn parse-char-row [row] | |
(map #(Integer/parseInt %) (filter #(or (= % "1") (= % "0")) (split row #"")))) | |
(defn parse-char-data [element] | |
(let [label (trim (last element)) | |
rows (take 32 element)] | |
[label (vec (flatten (map parse-char-row rows)))])) | |
(defn load-training-data | |
"Loads training data from the weird format used by | |
http://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits" | |
[filename] | |
(let [lines (drop 21 (read-lines filename)) | |
elements (partition 33 lines)] | |
(map parse-char-data elements) | |
)) | |
(def training-set (load-training-data "training-set.tra")) | |
; Classification | |
(defn load-char-file [file] | |
(let [filename (.getName file) | |
tokens (split filename #"[_\.]") | |
label (first tokens) | |
contents (parse-char-row (slurp file))] | |
[label contents])) | |
(defn minus-vector [& args] | |
(map #(apply - %) (apply map vector args))) | |
(defn sum-of-squares [coll] | |
(reduce (fn [a v] (+ a (* v v))) coll)) | |
(defn calculate-distances [in] | |
(fn [row] | |
(let [vector-diff (minus-vector (last in) (last row)) | |
label (first row) | |
distance (sqrt (sum-of-squares vector-diff))] | |
[label distance]))) | |
(defn classify | |
"Classify the given vector using a kNN algorithm." | |
[in] | |
(let [k 10 | |
diffs (map (calculate-distances in) training-set) | |
nearest-neighbours (frequencies (map first (take k (sort-by last diffs)))) | |
classification (first (last (sort-by second nearest-neighbours)))] | |
classification)) | |
; Main functions | |
(defn classify-image [filename] | |
(convert-image filename temp-outfile) | |
(classify (load-char-file (java.io.File. temp-outfile)))) | |
(defn -main [& args] | |
(doseq [filename args] | |
(println "I think that is the number" (classify-image filename)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment