Skip to content

Instantly share code, notes, and snippets.

@malcolmsparks
Last active May 28, 2019 14:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save malcolmsparks/16e5618ebb759f15f7fc938eacd5c9f4 to your computer and use it in GitHub Desktop.
Save malcolmsparks/16e5618ebb759f15f7fc938eacd5c9f4 to your computer and use it in GitHub Desktop.
json path parsing in clojure
(ns juxt.jsonschema.formula
(:require [clojure.set :as set])
)
(defn partition-into-ranges-iter
"Find consecutive number sequences. O(n)"
[coll]
(loop [[x & xs] (sort coll)
subsequent 0
curr []
ranges []]
(if-not x
(cond-> ranges (seq curr) (conj curr))
(if (= (inc subsequent) (int x))
(recur xs (int x) (conj curr x) ranges)
(recur xs (int x) [x] (cond-> ranges (seq curr) (conj curr)))))))
(defprotocol RegExpressable
(as-regex-str [_] "Return a string that represents the Java regex"))
(defn int-range
"Range between n1 (inclusive) and n2 (inclusive)"
[n1 n2]
(range (int n1) (inc (int n2))))
(def regex-chars
(merge
{(int \\) "\\\\"
(int \u0009) "\\t"
(int \u000A) "\\n"
(int \u000D) "\\r"
(int \u000C) "\\f"
(int \u0007) "\\a"
(int \u001B) "\\e"}
(into {} (for [n (concat
(int-range \A \Z)
(int-range \a \z)
(int-range \0 \9))]
[n (str (char n))]))))
(defn int->regex [n]
(cond (< n 256) (get regex-chars n (format "\\x%02X" n))
(< n 65536) (format "\\u%04X" n)
:else (format "\\x{%04X}" n)))
(defn expand-with-character-classes
"Take a collection of characters and return a string representing the
concatenation of the Java regex characters, including the use
character classes wherever possible without conformance loss. This
function is not designed for performance and should be called to
prepare systems prior to the handling of HTTP requests."
[s]
(let [{:keys [classes remaining]}
(reduce
(fn [{:keys [remaining] :as acc} {:keys [class set]}]
(cond-> acc
(set/subset? set remaining) (-> (update :classes conj class)
(update :remaining set/difference set))))
{:remaining (set s) :classes []}
[{:class "Alnum" :set (set (concat (int-range \A \Z) (int-range \a \z) (int-range \0 \9)))}
{:class "Alpha" :set (set (concat (int-range \A \Z) (int-range \a \z)))}
{:class "XDigit" :set (set (concat (int-range \0 \9) (int-range \A \F) (int-range \a \f)))}
{:class "Digit" :set (set (int-range \0 \9))}
{:class "Cntrl" :set (set (concat (int-range \u0000 \u001f) [(int \u007f)]))}
{:class "Punct" :set (set (map int [\! \" \# \$ \% \& \' \(
\) \* \+ \, \- \. \/ \:
\; \< \= \> \? \@ \[ \\
\] \^ \_ \` \{ \| \} \~]))}
{:class "Blank" :set (set (map int [\space \tab]))}])]
(let [cs (concat
(map #(format "\\p{%s}" %) classes)
;; Find ranges
(map (fn [x] (if (> (count x) 1)
(format "[%s-%s]"
(int->regex (first x))
(int->regex (last x)))
(int->regex (first x))))
(partition-into-ranges-iter remaining)))]
(if (> (count cs) 1)
(format "[%s]" (apply str cs))
(apply str cs)))))
(extend-protocol RegExpressable
clojure.lang.ISeq
(as-regex-str [s]
(expand-with-character-classes (map int s)))
clojure.lang.PersistentVector
(as-regex-str [s]
(expand-with-character-classes (map int s)))
String
(as-regex-str [s] s)
Character
(as-regex-str [c]
(int->regex (int c)))
Integer
(as-regex-str [n]
(int->regex n))
Long
(as-regex-str [n]
(assert (<= n Integer/MAX_VALUE))
(int->regex (int n)))
java.util.regex.Pattern
(as-regex-str [re]
(str re))
clojure.lang.PersistentHashSet
(as-regex-str [s]
(as-regex-str (seq s))))
(defn compose [fmt & args]
(re-pattern (apply format fmt (map as-regex-str args))))
(def ALPHA (concat (int-range \A \Z) (int-range \a \z)))
(def DIGIT (int-range \0 \9))
(def PERIOD \.)
(def unescaped (concat (int-range 0x00 0x2E)
(int-range 0x30 0x7D)
;; Should be this:
#_(int-range 0x7F 0x10FFFF)
;; but too slow, so do this instead for now:
(int-range 0x7F 0xFFFF)))
(def referenced-token (compose "(?:[%s]|~0|~1)*" unescaped))
(def json-pointer (compose "(?:/%s)*" referenced-token))
;; draft-handrews-relative-json-pointer-01
(def non-negative-integer (compose "(?:%s|%s%s*)" \0 (int-range \1 \9) (int-range \0 \9)))
(def relative-json-pointer (compose "%s(?:#|%s)" non-negative-integer json-pointer))
(def relative-json-path #"@.*")
(def json-identifier (compose "%s%s*" ALPHA (concat ALPHA DIGIT)))
(def dotted-json-segment (compose "%s%s" PERIOD json-identifier))
(def json-array-element (compose "(?:%s+|%s%s%s)" DIGIT \' json-identifier \'))
(def array-json-segment (compose "%s%s%s" \[ json-array-element \]))
(def json-segment (compose "(?:%s|%s)" dotted-json-segment array-json-segment ))
(def absolute-json-path (compose "%s(?:%s)*" \$ json-segment))
(def json-path (compose "(?:%s|%s)" absolute-json-path relative-json-path))
(def formula-numeric (compose "%s?%s+" \- DIGIT))
(def formula-operand (compose "(?:%s|%s)" absolute-json-path formula-numeric))
(def formula-operator (compose "[%s]" [\+ \- \* \/]))
(def formula-expr (compose "(%s)(%s)(%s)" formula-operand formula-operator formula-operand))
(def formula (compose "(?<path>%s)%s(?<expr>%s)%s" relative-json-pointer \{ formula-expr \}))
(drop 3 (re-matches formula "1/{-1/$.Jaguar}"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment