Skip to content

Instantly share code, notes, and snippets.

@ckirkendall
Created December 8, 2014 23:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ckirkendall/d76f025f1a205f5dacc9 to your computer and use it in GitHub Desktop.
Save ckirkendall/d76f025f1a205f5dacc9 to your computer and use it in GitHub Desktop.
(ns ocr-kata.core
(:require [clojure.java.io :refer [writer reader resource]]))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; we treat this OCR parser similar to a language
;; parser where input into an AST that is
;; transform and tagged by diffrent analysis steps
;; the ast is then passed to an emmiter.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(def numbers {[0 1 0 1 0 1 1 1 1] 0
[0 0 0 0 0 1 0 0 1] 1
[0 1 0 0 1 1 1 1 0] 2
[0 1 0 0 1 1 0 1 1] 3
[0 0 0 1 1 1 0 0 1] 4
[0 1 0 1 1 0 0 1 1] 5
[0 1 0 1 1 0 1 1 1] 6
[0 1 0 0 0 1 0 0 1] 7
[0 1 0 1 1 1 1 1 1] 8
[0 1 0 1 1 1 0 1 1] 9})
(defn log-output [& out]
(apply println out)
(last out))
(defn line-to-bin [line]
(partition 3 (map #(if (= % \space) 0 1) line)))
(defn check-sum [nums]
(zero? (mod (apply + (map * (reverse nums) (range 1 10))) 11)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; base tagging trasforms
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(defn tag-numbers [{bin :bin :as data}]
(assoc data :nums (map numbers bin)))
(defn tag-check-sum [{ill :ill nums :nums :as data}]
(if-not ill
(assoc data :chksm (check-sum nums))
data))
(defn tag-illegible [{nums :nums :as data}]
(assoc data :ill (some nil? nums)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Error correction logic and tagging
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(defn flip-bit [bin idx]
(assoc bin idx (if (pos? (nth bin idx)) 0 1)))
(defn chg-perms [bin]
(let [vbin (vec bin)]
(map #(flip-bit vbin %) (range (count bin)))))
(defn get-psbl-acct-nums [{:keys [nums bin]}]
(let [perms (chg-perms (apply concat bin))
bins (map #(partition 9 %) perms)
accts (for [bs bins] (map numbers bs))
legit (filter #(not-any? nil? %) accts)]
(filter check-sum legit)))
(defn tag-err-options [{ill :ill nums :nums chksum :chksum :as data}]
(assoc data :opts (get-psbl-acct-nums data)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; formating ouput
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(defn format-amb-options [opts]
(str " AMB ['" (apply str (interpose "', '" (map #(apply str %) opts))) "']"))
(defn format-output [{:keys [nums ill chksm opts] :as data}]
(let [cnt-opts (count opts)
num-str (apply str (map #(if % % \?) nums))]
(cond
(= 1 cnt-opts) (apply str (first opts))
(pos? cnt-opts) (str num-str (format-amb-options opts))
(or ill (not chksm)) (str num-str " ILL")
:else num-str)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; core parsing logic
;; the input is threaded thorugh a series
;; of transforms and analyse steps before
;; being formated for output
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(defn convert-lines [lines]
(->> (take 3 lines)
(map line-to-bin)
(apply map concat)
(assoc {} :bin)
(tag-numbers)
(tag-illegible)
(tag-check-sum)
(tag-err-options)
(format-output)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; entry points
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(defn load-ocr-file [file]
(with-open [r (reader (resource file))]
(doall (map convert-lines (partition 4 (line-seq r))))))
(defn convert-file [in-file out-file]
(let [lines (load-ocr-file in-file)]
(with-open [w (writer out-file)]
(doseq [line lines]
(.write w (str line "\n"))))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment