Skip to content

Instantly share code, notes, and snippets.

@gardnervickers
Last active January 30, 2016 22:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gardnervickers/84a5194dac835fdc7911 to your computer and use it in GitHub Desktop.
Save gardnervickers/84a5194dac835fdc7911 to your computer and use it in GitHub Desktop.
(ns fuzzy-schema.core-test
(:require [schema
[core :as s]
[utils :as utils]]
[schema.spec.core :as spec]))
(def sample-schema
{:zookeeper/configuration {:zookeeper/address s/Str
:zookeeper/port s/Num
}
:onyx/onyx-id s/Str
(s/optional-key :w) s/Num})
(defn levenshtein [str1 str2]
"a Clojure levenshtein implementation using transient data structure"
(let [n (count str1) m (count str2)]
(cond
(= 0 n) m
(= 0 m) n
:else
(let [prev-col (transient (vec (range (inc m)))) col (transient [])] ; initialization for the first column.
(dotimes [i n]
(assoc! col 0 (inc i)) ; update col[0]
(dotimes [j m]
(assoc! col (inc j) ; update col[1..m]
(min (inc (nth col j))
(inc (nth prev-col (inc j)))
(+ (get prev-col j) (if (= (nth str1 i) (nth str2 j)) 0 1)))))
(dotimes [i (count prev-col)]
(assoc! prev-col i (get col i)))) ;
(last (persistent! col)))))) ; last element of last column
(defn get-distances [k possible-keys]
(mapv (juxt #(levenshtein (str k) (str %)) identity) possible-keys))
(def sample-data {:zookeeper/configuration {:zookeeper/address "100"
:zookeeper/ports 99}
:onyx/onyx-id "HI"})
(defn fuzzy-match-walk [schema]
(spec/run-checker
(fn [s params]
(let [checker (spec/checker (s/spec s) params)]
(fn [data]
(if-let [err (utils/error-val (checker data))]
(mapv (fn [[k v]]
(when (= v 'disallowed-key)
(println k " Is an invalid key."
"Did you mean: "
(mapv second (sort-by first
(get-distances k s))))))
err)
data))))
true
schema))
;((fuzzy-match-walk sample-schema) sample-data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment