Skip to content

Instantly share code, notes, and snippets.

@aroemers
Last active October 4, 2022 13:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aroemers/238a2499d6d4743374e1ff861fb475c5 to your computer and use it in GitHub Desktop.
Save aroemers/238a2499d6d4743374e1ff861fb475c5 to your computer and use it in GitHub Desktop.
Crustimoney 2
;;;; Implementation based on
;;;; https://blog.bruce-hill.com/packrat-parsing-from-scratch
(ns crustimoney2.core
(:require [clojure.string :as str]))
;;; Utils
(defn success? [o]
(and (map? o) o))
;;; Base parsing primitives
(defn literal [s]
(fn [text i]
(let [end (+ i (count s))]
(if (and (<= end (count text)) (= (subs text i end) s))
{:start i :end end}
(list {:error :expected-literal :at i :literal s})))))
(defn chain [& parsers]
(fn [text i]
(loop [parsers parsers
start i
children []]
(if-let [parser (first parsers)]
(let [result (parser text start)]
(if (success? result)
(recur (next parsers) (:end result) (conj children result))
result))
{:start i :end start :children children}))))
(defn one-of [& parsers]
(fn [text i]
(loop [parsers parsers
errors ()]
(if-let [parser (first parsers)]
(let [result (parser text i)]
(if (success? result)
{:start i :end (:end result) :children [result]}
(recur (next parsers) (concat errors result))))
errors))))
(defn repeat* [parser]
(fn [text i]
(loop [start i
children []]
(if-let [result (success? (parser text start))]
(recur (:end result) (conj children result))
{:start i :end start :children children}))))
(defn negate [parser]
(fn [text i]
(when-not (success? (parser text i))
{:start i :end i})))
(defn with-name [key parser]
(fn [text i]
(let [result (parser text i)]
(if (success? result)
(assoc result :name key)
result))))
;;; Extra combinators
(defn repeat+ [parser]
(let [parser* (repeat* parser)]
(fn [text i]
(let [head (parser text i)]
(if (success? head)
(let [more (parser* text (:end head))]
{:start i
:end (:end more)
:children (cons head (:children more))})
head)))))
(defn regex [re]
(let [pattern (re-pattern (str "^(" re ")"))]
(fn [text i]
(if-let [[match] (re-find pattern (subs text i))]
{:start i :end (+ i (count match))}
(list {:error :expected-match :at i :regex re})))))
(defn lookahead [parser]
(fn [text i]
(when (success? (parser text i))
{:start i :end i})))
(defn maybe [parser]
(fn [text i]
(or (success? (parser text i))
{:start i :end i})))
(defn with-error [key parser]
(fn [text i]
(let [result (parser text i)]
(or (success? result)
(list {:error key :at i})))))
(defn eof [parser]
(fn [text i]
(let [result (parser text i)]
(if (success? result)
(if (= (count text) (:end result))
result
(list {:error :eof-not-reached :at (:end result)}))
result))))
;;; Grammar maps
(defn- ref-fn [parsers]
(fn [key]
(fn [text i]
(if-let [parser (get @parsers key)]
(parser text i)
(throw (ex-info "Reference to unknown parser" {:key key}))))))
(defmacro rmap [parsers]
`(let [parsers# (atom nil)
~'ref (#'ref-fn parsers#)]
(reset! parsers# ~parsers)))
;;; Result parsing
(defn keep-named [result]
(if (:name result)
(list (if-let [children (:children result)]
(assoc result :children (mapcat keep-named children))
result))
(when-let [children (:children result)]
(mapcat keep-named children))))
;;; Packrat caching
(def ^:dynamic *cache* (fn [parser text i] (parser text i)))
(defmacro with-cache [cache & body]
`(binding [*cache* ~cache]
~@body))
(defn caching [parser]
(fn [text i]
(*cache* parser text i)))
;; Example atom cache with invalidation
(defn auto-invalidating-atom-cache [a]
(fn [parser text i]
(when-let [{:keys [result hashed]} (get-in (deref a) [parser i])]
(when-not (= hashed (hash (subs text (:start result) (:end result))))
(let [cache (swap! a update parser dissoc i)]
(when-not (seq (get cache parser))
(swap! a dissoc parser)))))
(or (get-in (deref a) [parser i :result])
(let [result (parser text i)]
(when (success? result)
(let [hashed (hash (subs text (:start result) (:end result)))]
(swap! a update parser assoc i {:result result :hashed hashed })))
result))))
;;; Example test
(comment
(def grammar
(rmap {:expr (ref :sum)
:sum (one-of (with-name :sum
(chain (ref :product)
(ref :sum-op)
(ref :sum)))
(ref :product))
:product (one-of (with-name :product
(chain (ref :value)
(ref :product-op)
(ref :product)))
(ref :value))
:value (one-of (ref :number)
(chain (literal "(")
(ref :expr)
(literal ")")))
:sum-op (regex "[+-]")
:product-op (regex "[*/]")
:number (with-name :number
(with-error :expected-number
(regex "[0-9]+")))}))
(def parser (eof (:expr grammar)))
(defn parse [s]
(let [result (parser s 0)]
(if (success? result)
(keep-named result)
(distinct result))))
(def cache (atom nil))
(defn parse-with-cache [s]
(binding [*cache* (auto-invalidating-atom-cache cache)]
(parse s)))
)
(ns crustimoney2.non-recursive)
;;; Utils
(defn success? [result]
(when (map? result)
result))
(defn errors? [result]
(when (list? result)
result))
(defn push? [result]
(when (vector? result)
result))
;;; Primitives
(defn literal [s]
(fn [text index]
(let [end (+ index (count s))]
(if (and (<= end (count text)) (= (subs text index end) s))
{:start index :end end}
(list {:error :expected-literal :at index :literal s})))))
(defn chain [& parsers]
(fn
([text index]
(if-let [parser (first parsers)]
[parser index {:pindex 0 :children []}]
{:start index :end index :children []}))
([text index result state]
(if (success? result)
(let [state (-> state (update :pindex inc) (update :children conj result))]
(if-let [parser (nth parsers (:pindex state) nil)]
[parser (:end result) state]
{:start (-> state :children first :start)
:end (-> state :children last :end)
:children (:children state)}))
result))))
(defn choice [& parsers]
(fn
([text index]
(if-let [parser (first parsers)]
[parser index {:pindex 0 :children [] :errors ()}]
{:start index :end index :children []}))
([text index result state]
(if (success? result)
{:start (:start result)
:end (:end result)
:children [result]}
(let [state (-> state (update :pindex inc) (update :errors into result))]
(if-let [parser (nth parsers (:pindex state) nil)]
[parser index state]
(:errors state)))))))
(defn repeat* [parser]
(fn
([text index]
[parser index {:children []}])
([text index result state]
(if (success? result)
(let [state (update state :children conj result)]
[parser (:end result) state])
{:start index
:end (-> state :children last (:end index))
:children (:children state)}))))
(defn negate [parser]
(fn
([text index]
[parser index nil])
([text index result state]
(when-not (success? result)
{:start index :end index}))))
(defn with-name [key parser]
(fn [& args]
(let [result (apply parser args)]
(cond-> result
(success? result) (assoc :name key)))))
;;; Node functions
(defn text-value [text result]
(subs text (:start result) (:end result)))
(defn flattened [result]
(let [children (mapcat (fn [child]
(if (:name child)
[child]
(filter :name (:children child))))
(:children result))]
(if (seq children)
(assoc result :children children)
(dissoc result :children))))
;;; Parsing virtual machine
(defn parse
([parser text]
(parse parser nil text))
([parser opts text]
(let [cache (:cache opts (constantly nil))
post-success (:post-success opts flattened)]
(loop [stack [[parser 0 nil]]
result nil
state nil]
(if-let [[parser index state'] (peek stack)]
(let [result (or (if result
(parser text index result state)
(parser text index))
())]
(cond (push? result)
(let [[push-parser push-index push-state] result]
(if-let [hit (cache text push-parser push-index)]
(recur stack hit push-state)
(recur (conj stack result) nil nil)))
(success? result)
(let [processed (post-success result)]
(cache text parser index processed)
(recur (pop stack) processed state'))
(errors? result)
(recur (pop stack) result state')
:otherwise
(throw (ex-info "unexpected result from parser"
{:parser parser, :type (type result)}))))
result)))))
;;; Extra combinators
(defn regex [re]
(let [pattern (re-pattern (str "^(" re ")"))]
(fn [text index]
(if-let [[match] (re-find pattern (subs text index))]
{:start index :end (+ index (count match))}
(list {:error :expected-match :at index :regex re})))))
(defn repeat+ [parser]
(fn
([text index]
[parser index {:children []}])
([text index result state]
(if (success? result)
[parser (:end result) (update state :children conj result)]
(if-let [children (seq (:children state))]
{:start index
:end (-> children last :end)
:children children}
result)))))
(defn lookahead [parser]
(fn
([text index]
[parser index nil])
([text index result state]
(when (success? result)
{:start index :end index}))))
(defn maybe [parser]
(fn
([text index]
[parser index nil])
([text index result state]
(or (success? result)
{:start index :end index}))))
(defn eof [parser]
(fn
([text index]
[parser index nil])
([text index result state]
(if (success? result)
(if (= (:end result) (count text))
result
(list {:error :eof-not-reached :at (:end result)}))
result))))
(defn finding [parser]
(let [grammar (rmap {:finding (choice parser (chain (regex #".") (ref :finding)))})]
(:finding grammar)))
(defn with-error [key parser]
(fn [text index & args]
(let [result (apply parser text index args)]
(if (errors? result)
(list {:error key :at index})
result))))
(defn with-value
([parser]
(with-value identity parser))
([f parser]
(fn [text & args]
(let [result (apply parser text args)]
(cond-> result
(success? result)
(assoc :value (f (text-value text result))))))))
;;; Packrat caches
(defn atom-cache []
(let [a (atom nil)]
(fn
([text parser index]
(get-in @a [parser index]))
([text parser index result]
(swap! a assoc-in [parser index] result)
result))))
;;; Gramma definition
(defn- ref-fn [parsers]
(fn [key]
(fn [& args]
(if-let [parser (get @parsers key)]
(apply parser args)
(throw (ex-info "Reference to unknown parser" {:key key}))))))
(defmacro rmap [parsers]
`(let [parsers# (atom nil)
~'ref (#'ref-fn parsers#)]
(reset! parsers# ~parsers)))
;;; Examples
(comment
(def resolve-sym (comp resolve symbol))
(def grammar
(rmap {:expr (ref :sum)
:sum (choice (with-name :calc
(chain (ref :product)
(ref :sum-op)
(ref :sum)))
(ref :product))
:product (choice (with-name :calc
(chain (ref :value)
(ref :product-op)
(ref :product)))
(ref :value))
:value (choice (ref :number)
(chain (literal "(")
(ref :expr)
(literal ")")))
:sum-op (with-value resolve-sym
(with-name :op
(regex "[+-]")))
:product-op (with-value resolve-sym
(with-name :op
(regex "[*/]")))
:number (with-value parse-long
(with-name :number
(with-error :expected-number
(regex "[0-9]+"))))}))
(defn calc [result]
(case (:name result)
:number (:value result)
:calc (let [[x {op :value} y] (:children result)]
(op (calc x) (calc y)))))
(defn calculate-string [s]
(-> grammar :expr finding (parse s) :children first calc))
(calculate-string "the calculatino (1+2)*3 should be 9"))
(ns crustimoney2.non-recursive)
;;; Result constructors and predicates
(defn ->success
"Create a success result, given a start index (inclusive) and end
index (exclusive). Optionally a collection of success children can
be given. The name of the success is nil."
([start end]
[nil {:start start :end end}])
([start end children]
(into (->success start end) children)))
(defn success?
"Returns obj if obj is a success value, nil otherwise."
[obj]
(when (vector? obj)
obj))
(defn success->start
"Return the start index of a success."
[success]
(-> success second :start))
(defn success->end
"Return the end index of a success."
[success]
(-> success second :end))
(defn with-success-children
"Set the children of a success."
[success children]
(let [[name attrs] success]
(into [name attrs] children)))
(defn success->children
"Returns the children of a success."
[success]
(drop 2 success))
(defn with-success-name
"Set the name of the success value."
[key success]
(vec (cons key (rest success))))
(defn success->name
"Return the name of a success."
[success]
(first success))
(defn with-success-attrs
"Add extra success attributes to the given success."
[success attrs]
(update success 1 merge attrs))
(defn success->attrs
"Return the attributes of a success."
[success]
(dissoc (second success) :start :end))
(defn success->attr
"Returns an attribute value of a success."
[success attr]
(get (second success) attr))
(defn ->error
"Create an error result, given an error key and an index. An extra
detail object can be added."
([key index]
{:key key :at index})
([key index detail]
{:key key :at index :detail detail}))
(defn error->key
"Return the key of an error."
[error]
(error :key))
(defn error->index
"Return the index of an error"
[error]
(error :at))
(defn error->detail
"Return the detail object of an error."
[error]
(error :detail))
(defn ->push
"Create a push value, given a parser function and an index. Optionally
a state object can be added."
([parser index]
(->push parser index nil))
([parser index state]
{:parser parser :index index :state state}))
(defn push?
"Returns obj if obj is a push value."
[obj]
(when (map? obj)
obj))
(defn push->parser
"Returns the parser of a push value."
[push]
(push :parser))
(defn push->index
"Returns the index of a push value."
[push]
(push :index))
(defn push->state
"Returns the state of a push value."
[push]
(push :state))
;;; Primitives
(defn literal [s]
(fn [text index]
(let [end (+ index (count s))]
(if (and (<= end (count text)) (= (subs text index end) s))
(->success index end)
(list (->error :expected-literal index {:literal s}))))))
(defn chain [& parsers]
(fn
([text index]
(if-let [parser (first parsers)]
(->push parser index {:pindex 0 :children []})
(->success index index)))
([text index result state]
(if (success? result)
(let [state (-> state (update :pindex inc) (update :children conj result))]
(if-let [parser (nth parsers (:pindex state) nil)]
(->push parser (success->end result) state)
(->success (-> state :children first success->start)
(-> state :children last success->end)
(:children state))))
result))))
(defn choice [& parsers]
(fn
([text index]
(if-let [parser (first parsers)]
(->push parser index {:pindex 0 :children [] :errors ()})
(->success index index)))
([text index result state]
(if (success? result)
(->success (success->start result) (success->end result) [result])
(let [state (-> state (update :pindex inc) (update :errors into result))]
(if-let [parser (nth parsers (:pindex state) nil)]
(->push parser index state)
(:errors state)))))))
(defn repeat* [parser]
(fn
([text index]
(->push parser index {:children []}))
([text index result state]
(if (success? result)
(let [state (update state :children conj result)]
(->push parser (:end result) state))
(let [end (or (some-> state :children last success->end) index)]
(->success index end (:children state)))))))
(defn negate [parser]
(fn
([text index]
(->push parser index))
([text index result state]
(when-not (success? result)
(->success index index)))))
(defn with-name [key parser]
(fn [& args]
(let [result (apply parser args)]
(cond->> result
(success? result) (with-success-name key)))))
;;; Node functions
(defn text-value [text result]
(subs text (success->start result) (success->end result)))
(defn flattened [result]
(let [children (mapcat (fn [child]
(if (success->name child)
[child]
(filter success->name (success->children child))))
(success->children result))]
(with-success-children result children)))
;;; Parsing virtual machine
(defn parse
([parser text]
(parse parser nil text))
([parser opts text]
(let [start-index (:index opts 0)
cache (:cache opts (constantly nil))
post-success (:post-success opts flattened)]
(loop [stack [(->push parser start-index)]
result nil
state nil]
(if-let [stack-item (peek stack)]
(let [parser (push->parser stack-item)
index (push->index stack-item)
state' (push->state stack-item)
result (or (if result
(parser text index result state)
(parser text index))
())]
(cond (push? result)
(let [push-parser (push->parser result)
push-index (push->index result)
push-state (push->state result)]
(if-let [hit (cache text push-parser push-index)]
(recur stack hit push-state)
(recur (conj stack result) nil nil)))
(success? result)
(let [processed (post-success result)]
(cache text parser index processed)
(recur (pop stack) processed state'))
(list? result)
(recur (pop stack) result state')
:otherwise
(let [info {:parser parser, :type (type result)}]
(throw (ex-info "Unexpected result from parser" info)))))
result)))))
;;; Grammar definition
(defn- ref-fn [parsers]
(fn [key]
(fn [& args]
(if-let [parser (get @parsers key)]
(apply parser args)
(throw (ex-info "Reference to unknown parser" {:key key}))))))
(defmacro rmap [parsers]
`(let [parsers# (atom nil)
~'ref (#'ref-fn parsers#)]
(reset! parsers# ~parsers)))
;;; Extra combinators
(defn regex [re]
(let [pattern (re-pattern (str "^(" re ")"))]
(fn [text index]
(if-let [[match] (re-find pattern (subs text index))]
(->success index (+ index (count match)))
(list (->error :expected-match index {:regex re}))))))
(defn repeat+ [parser]
(fn
([text index]
(->push parser index {:children []}))
([text index result state]
(if (success? result)
(->push parser (success->end result) (update state :children conj result))
(if-let [children (seq (:children state))]
(->success index (-> children last success->end) children)
result)))))
(defn lookahead [parser]
(fn
([text index]
(->push parser index))
([text index result state]
(when (success? result)
(->success index index)))))
(defn maybe [parser]
(fn
([text index]
(->push parser index))
([text index result state]
(or (success? result)
(->success index index)))))
(defn eof [parser]
(fn
([text index]
(->push parser index))
([text index result state]
(if (success? result)
(if (= (success->end result) (count text))
result
(list (->error :eof-not-reached (success->end result))))
result))))
(defn finding [parser]
(let [grammar (rmap {:finding (choice parser (chain (regex #".") (ref :finding)))})]
(:finding grammar)))
(defn with-error [key parser]
(fn [text index & args]
(let [result (apply parser text index args)]
(if (list? result)
(list (->error key index))
result))))
(defn with-value
([parser]
(with-value identity parser))
([f parser]
(fn [text & args]
(let [result (apply parser text args)]
(cond-> result
(success? result)
(with-success-attrs {:value (f (text-value text result))}))))))
;;; Packrat caches
(defn atom-cache []
(let [a (atom nil)]
(fn
([text parser index]
(get-in @a [parser index]))
([text parser index result]
(swap! a assoc-in [parser index] result)
result))))
;;; Examples
(comment
(def resolve-sym (comp resolve symbol))
(def grammar
(rmap {:expr (ref :sum)
:sum (choice (with-name :calc
(chain (ref :product)
(ref :sum-op)
(ref :sum)))
(ref :product))
:product (choice (with-name :calc
(chain (ref :value)
(ref :product-op)
(ref :product)))
(ref :value))
:value (choice (ref :number)
(chain (literal "(")
(ref :expr)
(literal ")")))
:sum-op (with-value resolve-sym
(with-name :op
(regex "[+-]")))
:product-op (with-value resolve-sym
(with-name :op
(regex "[*/]")))
:number (with-value parse-long
(with-name :number
(with-error :expected-number
(regex "[0-9]+"))))}))
(defn calc [result]
(case (success->name result)
:number (success->attr result :value)
:calc (let [[x op y] (success->children result)
f (success->attr op :value)]
(f (calc x) (calc y)))))
(defn calculate-string [s]
(-> grammar :expr finding (parse s) success->children first calc))
(calculate-string "the calculatino (1+2)*3 should be 9")
(def grammar
(rmap {:start (choice (ref :rules)
(ref :choice))
:rules (choice (chain (ref :rule)
(maybe (ref :space))
(regex #"\n")
(ref :rules))
(ref :rule))
:rule (with-name :rule
(chain (maybe (ref :space))
(ref :non-terminal)
(maybe (ref :space))
(literal "<-")
(maybe (ref :space))
(ref :choice)))
:choice (with-name :choices
(repeat+ (choice (chain (maybe (ref :space))
(literal "/")
(maybe (ref :space))
(ref :chain))
(ref :chain))))
:chain (with-name :chain
(repeat+ (choice (chain (ref :space)
(ref :item))
(ref :item))))
:item (choice (ref :literal)
(ref :character-class)
(ref :non-terminal)
(ref :group))
:group (with-name :group
(chain (literal "(")
(maybe (ref :space))
(ref :choice)
(maybe (ref :space))
(literal ")")))
:literal (with-error :expected-literal
(chain (literal "'")
(with-name :literal
(with-value
(regex #"[^']*")))
(literal "'")))
:character-class (with-error :expected-character-class
(with-name :character-class
(with-value
(chain (literal "[")
(regex #"[^\]]+")
(literal "]")))))
:non-terminal (with-name :non-terminal
(with-value
(with-error :expected-non-terminal
(regex #"\w+"))))
:space (regex #"[ \t]+")})))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment