Skip to content

Instantly share code, notes, and snippets.

@jvillste
Last active July 1, 2022 06:57
Show Gist options
  • Save jvillste/746cba91fc55c86fb8b29852a83f31f6 to your computer and use it in GitHub Desktop.
Save jvillste/746cba91fc55c86fb8b29852a83f31f6 to your computer and use it in GitHub Desktop.
(ns parser-combinator
"A parser combinator library.
A parser is a function that takes a sequence of tokens and returns
all possible partial parses, or nil if the parising fails. A partial
parse is a pair of a parse and a sequence of remaining tokens. A
parse is a sequence of values or tagged values. A tagged value is a
vector containing a keyword followed by values.
A parser combinator is a function from one or more parsers to a
parser.
Use complete-parses to get only complete parses out of a parser."
(:require [clojure.test :refer [deftest is]]
[clojure.walk :as walk]))
(defn empty-to-nil [collection]
(if (empty? collection)
nil
collection))
(defn- tagged-parse [tag parse]
(if tag
[(into [tag] parse)]
parse))
(defn- create-partial-parse [tag parse remaining-tokens]
[(tagged-parse tag parse)
remaining-tokens])
(defn any
([]
(any nil))
([tag]
(fn [tokens]
(if (empty? tokens)
nil
[(create-partial-parse tag
[(first tokens)]
(rest tokens))]))))
(deftest test-any
(is (= nil
((any)
[])))
(is (= '[[[:x] (:y)]]
((any)
[:x :y])))
(is (= '[[[[:any :x]] (:y)]]
((any :any)
[:x :y]))))
(defn conforms
([predicate]
(conforms nil predicate))
([tag predicate]
(fn [tokens]
(when (predicate (first tokens))
[(create-partial-parse tag
[(first tokens)]
(rest tokens))]))))
(deftest test-conforms
(is (= '[[[[:number 1]] ()]]
((conforms :number number?)
[1])))
(is (= '[[[[:number 1]] (2)]]
((conforms :number number?)
[1 2])))
(is (= nil
((conforms :number number?)
[:x]))))
(defn repetition
([minimum-repeats maximum-repeats parser]
(repetition nil minimum-repeats maximum-repeats parser))
([tag minimum-repeats maximum-repeats parser]
(fn [tokens]
(loop [complete-parses (if (or (= 0 minimum-repeats)
(nil? minimum-repeats))
[[[] tokens]]
[])
forks []
parse []
remaining-tokens tokens
repeats 0]
;; (prn 'repeats repeats)
;; (prn 'parse parse)
;; (prn 'remaining remaining-tokens)
;; (prn 'forks forks)
(if (empty? remaining-tokens)
(if (empty? forks)
(empty-to-nil complete-parses)
(let [[parse remaining-tokens] (first forks)]
(recur complete-parses
(rest forks)
parse
remaining-tokens
repeats)))
(if (or (nil? maximum-repeats)
(< repeats maximum-repeats))
(if-let [parses (parser remaining-tokens)]
(let [[new-parse remaining-tokens] (first parses)
repeats (inc repeats)]
(recur (if (or (nil? minimum-repeats)
(<= minimum-repeats
repeats))
(conj complete-parses
(create-partial-parse tag
(concat parse new-parse)
remaining-tokens))
complete-parses)
(concat forks (rest parses))
(concat parse new-parse)
remaining-tokens
repeats))
(empty-to-nil complete-parses))
(empty-to-nil complete-parses)))))))
(deftest test-repetition
(is (= '[[[] [1 2 3]]
[[[:numbers [:number 1]]] (2 3)]
[[[:numbers [:number 1] [:number 2]]] (3)]
[[[:numbers [:number 1] [:number 2] [:number 3]]] ()]]
((repetition :numbers
nil
nil
(conforms :number
number?))
[1 2 3])))
(is (= '[[[] [1 2 3]]
[[1] (2 3)]
[[1 2] (3)]
[[1 2 3] ()]]
((repetition nil
nil
(conforms number?))
[1 2 3])))
(is (= '[[[] [1 :x]]
[[[:numbers [:number 1]]] (:x)]]
((repetition :numbers
nil
nil
(conforms :number
number?))
[1 :x])))
(is (= '[[[[:numbers [:number 1]]] (:x)]]
((repetition :numbers
1
nil
(conforms :number
number?))
[1 :x])))
(is (= '[[[[:numbers [:number 1] [:number 2]]] (3 4 5)]
[[[:numbers [:number 1] [:number 2] [:number 3]]] (4 5)]]
((repetition :numbers
2
3
(conforms :number
number?))
[1 2 3 4 5])))
(is (= [[[] [:x]]]
((repetition :numbers
nil
nil
(conforms :number
number?))
[:x])))
(is (= nil
((repetition :numbers
1
nil
(conforms :number
number?))
[:x]))))
(defn none-or-more
([parser]
(none-or-more nil parser))
([tag parser]
(repetition tag nil nil parser)))
(deftest test-none-or-more
(is (= '[[[] [1 2 3]]
[[[:numbers [:number 1]]] (2 3)]
[[[:numbers [:number 1] [:number 2]]] (3)]
[[[:numbers [:number 1] [:number 2] [:number 3]]] ()]]
((none-or-more :numbers
(conforms :number
number?))
[1 2 3])))
(is (= '[[[] [1 :x]]
[[[:numbers [:number 1]]] (:x)]]
((none-or-more :numbers
(conforms :number
number?))
[1 :x]))))
(defn one-or-more
([parser]
(one-or-more nil parser))
([tag parser]
(repetition tag 1 nil parser)))
(defn some-of
"Tries all given parsers and returns all resulting partial parses.
Call with or without tags:
(some-of :a parser-a
:b parser-b)
or
(some-of parser-a
parser-b)
"
[& parsers-or-tag-parser-pairs]
(assert (or (every? fn? parsers-or-tag-parser-pairs)
(even? (count parsers-or-tag-parser-pairs)))
"even number of tags and parsers or only parsers must be given")
(let [tag-parser-pairs (if (every? fn? parsers-or-tag-parser-pairs)
(for [parser parsers-or-tag-parser-pairs]
[nil parser])
(partition 2 parsers-or-tag-parser-pairs))]
(fn [tokens]
(loop [tag-parser-pairs tag-parser-pairs
complete-parses []]
(if-let [[tag parser] (first tag-parser-pairs)]
(if-let [parses (parser tokens)]
(recur (rest tag-parser-pairs)
(concat complete-parses
(for [[parse remaining-tokens] parses]
(create-partial-parse tag
parse
remaining-tokens))))
(recur (rest tag-parser-pairs)
complete-parses))
(empty-to-nil complete-parses))))))
(deftest test-some-of
(is (= '([[[:number 1]] ()])
((some-of :number (conforms number?)
:even-number? (conforms (fn [value]
(and (number? value)
(even? value)))))
[1])))
(is (= '([[1] ()])
((some-of (conforms number?)
(conforms keyword?))
[1])))
(is (= '([[[:number 1]] ()])
((some-of :even-number? (conforms (fn [value]
(and (number? value)
(even? value))))
:number (conforms number?))
[1])))
(is (= '([[[:number 2]] ()]
[[[:even-number? 2]] ()])
((some-of :number (conforms number?)
:even-number? (conforms (fn [value]
(and (number? value)
(even? value)))))
[2])))
(is (= '([[[:number 1]] (2)])
((some-of :number (conforms number?)
:even-number? (conforms (fn [value]
(and (number? value)
(even? value)))))
[1 2])))
(is (= nil
((some-of :number (conforms number?))
[:x :y])))
(is (= '([[[:letter "A"]] ()])
((some-of :letter (conforms (fn [value]
(and (string? value)
(= 1 (count value)))))
:number (conforms number?))
["A"])))
(is (= '([[[:number 1]] ()])
((some-of :letter (conforms (fn [value]
(and (string? value)
(= 1 (count value)))))
:number (conforms number?))
[1]))))
(defn catenation
"Applies given parsers in order. All parsers must succeed.
Call with or without tags:
(catenation :a parser-a
:b parser-b)
or
(catenation parser-a
parser-b)
"
[& parsers-or-tag-parser-pairs]
(assert (or (every? fn? parsers-or-tag-parser-pairs)
(even? (count parsers-or-tag-parser-pairs)))
"even number of tags and parsers or only parsers must be given")
(let [tag-parser-pairs (if (every? fn? parsers-or-tag-parser-pairs)
(for [parser parsers-or-tag-parser-pairs]
[nil parser])
(partition 2 parsers-or-tag-parser-pairs))]
(fn [tokens]
(loop [complete-parses []
forks []
tag-parser-pairs tag-parser-pairs
parse []
remaining-tokens tokens]
;; (prn 'parse parse)
;; (prn 'remaining remaining-tokens)
;; (prn 'forks forks)
(if-let [[tag parser] (first tag-parser-pairs)]
(if-let [parses (parser remaining-tokens)]
(let [[new-parse remaining-tokens] (first parses)]
(recur complete-parses
(concat forks
(for [[new-parse remaining-tokens] (rest parses)]
{:tag-parser-pairs (rest tag-parser-pairs)
:parse (concat parse (tagged-parse tag new-parse))
:remaining-tokens remaining-tokens}))
(rest tag-parser-pairs)
(concat parse (tagged-parse tag new-parse))
remaining-tokens))
(if (empty? forks)
(empty-to-nil complete-parses)
(let [fork (first forks)]
(recur complete-parses
(rest forks)
(:tag-parser-pairs fork)
(:parse fork)
(:remaining-tokens fork)))))
(let [complete-parses (conj complete-parses
[parse
remaining-tokens])]
(if (empty? forks)
(empty-to-nil complete-parses)
(let [fork (first forks)]
(recur complete-parses
(rest forks)
(:tag-parser-pairs fork)
(:parse fork)
(:remaining-tokens fork))))))))))
(deftest test-catenation
(is (= '[[([:number 1] [:keyword :a]) ()]]
((catenation (conforms :number number?)
(conforms :keyword keyword?))
[1 :a])))
(is (= '[[([:number 1] [:keyword :a]) ()]]
((catenation :number (conforms number?)
:keyword (conforms keyword?))
[1 :a])))
(is (= '[[([:prefix 1 2] :a) (3 :b)]
[([:prefix 1 2 :a 3] :b) ()]]
((catenation :prefix (none-or-more (any nil))
nil (conforms keyword?))
[1 2 :a 3 :b])))
(is (= nil
((catenation :number-and-keyword
(conforms :keyword keyword?))
[1]))))
(defn parse [partial-parse]
(first partial-parse))
(defn remaining-tokens [partial-parse]
(second partial-parse))
(defn- remove-underscores [parse]
(walk/prewalk (fn [value]
(if (sequential? value)
(vec (remove (fn [value]
(and (vector? value)
(= :_ (first value))))
value))
value))
parse))
(deftest test-remove-underscores
(is (= [[:a 1]]
(remove-underscores [[:a 1]])))
(is (= []
(remove-underscores [[:_ 1]])))
(is (= [[:a 1]]
(remove-underscores [[:a 1] [:_ 2]])))
(is (= [[:a [:b 1]]]
(remove-underscores [[:a [:b 1]]])))
(is (= [[:a]]
(remove-underscores [[:a [:_ 1]]]))))
(defn all-parses
"Runs the given parser and returns resulting complete or parital
parses. Removes values tagged :_ from the parses."
[parser tokens]
(->> (parser tokens)
(map parse)
(map remove-underscores)))
(defn complete-parses
"Runs the given parser and returns only parses that did not leave any
tokens unparsed. Removes values tagged :_ from the parses."
[parser tokens]
(->> (parser tokens)
(filter #(empty? (remaining-tokens %)))
(map parse)
(map remove-underscores)))
(deftest test-parsing
(is (= '((1))
(complete-parses (conforms number?)
[1])))
(is (= '([[:number 1] [:keyword :x]])
(complete-parses (catenation (conforms :number number?)
(conforms :keyword keyword?))
[1 :x])))
(is (= '([[:prefix :a] 1 [:postfix :b 2 :c]]
[[:prefix :a 1 :b] 2 [:postfix :c]])
(complete-parses (catenation (none-or-more :prefix (any))
(conforms number?)
(none-or-more :postfix (any)))
[:a 1 :b 2 :c])))
(is (= '([:street-name-word])
(complete-parses (conforms #{:street-name-word})
[:street-name-word])))
(is (= '([[:street-name :street-name-word :street-name-word]])
(complete-parses (one-or-more :street-name (conforms #{:street-name-word}))
[:street-name-word :street-name-word])))
(is (= '([[:apartment-specifiers [:number 1] [:letter "A"] [:number 2]]])
(complete-parses (one-or-more :apartment-specifiers (some-of :letter (conforms (fn [value]
(and (string? value)
(= 1 (count value)))))
:number (conforms number?)))
[1 "A" 2])))
(is (= '([[:street-name :street-name-word]
[:apartment-specifiers [:number 1] [:letter "A"] [:number 2]]])
(complete-parses (catenation :street-name (one-or-more (conforms #{:street-name-word}))
:apartment-specifiers (one-or-more (some-of :letter (conforms (fn [value]
(and (string? value)
(= 1 (count value)))))
:number (conforms number?))))
[:street-name-word 1 "A" 2])))
(let [parser (catenation (none-or-more :_ (any))
(some-of :po-box (catenation (conforms :_ #{:pl})
(conforms number?))
:street-address (catenation :street-name (one-or-more (conforms #{:street-name-word}))
:apartment-specifiers (one-or-more (some-of (conforms (fn [value]
(and (string? value)
(= 1 (count value)))))
(conforms number?)))))
(none-or-more :_ (any)))]
(is (= '([[:street-address
[:street-name :street-name-word]
[:apartment-specifiers 2]]])
(complete-parses parser [:street-name-word 2])))
(is (= '([[:street-address
[:street-name :street-name-word]
[:apartment-specifiers 3]]]
[[:street-address
[:street-name :street-name-word]
[:apartment-specifiers 1]]]
[[:street-address
[:street-name :street-name-word]
[:apartment-specifiers 1 "A"]]]
[[:street-address
[:street-name :street-name-word]
[:apartment-specifiers 1 "A" 2]]])
(complete-parses parser [1 "A" :street-name-word 1 "A" 2 :street-name-word 3])))
(is (= '([[:po-box 10]]
[[:street-address
[:street-name :street-name-word]
[:apartment-specifiers 2]]])
(complete-parses parser [1 "A" :street-name-word 2 :pl 10])))
(is (= '(([:street-address
[:street-name :street-name-word :street-name-word]
[:apartment-specifiers 2]])
([:street-address
[:street-name :street-name-word]
[:apartment-specifiers 2]]))
(complete-parses parser [1 :street-name-word :word :street-name-word :street-name-word 2 :word])))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment