Last active
July 1, 2022 06:57
-
-
Save jvillste/746cba91fc55c86fb8b29852a83f31f6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns parser-combinator | |
"A parser combinator library. | |
A parser is a function that takes a sequence of tokens and returns | |
all possible partial parses, or nil if the parising fails. A partial | |
parse is a pair of a parse and a sequence of remaining tokens. A | |
parse is a sequence of values or tagged values. A tagged value is a | |
vector containing a keyword followed by values. | |
A parser combinator is a function from one or more parsers to a | |
parser. | |
Use complete-parses to get only complete parses out of a parser." | |
(:require [clojure.test :refer [deftest is]] | |
[clojure.walk :as walk])) | |
(defn empty-to-nil [collection] | |
(if (empty? collection) | |
nil | |
collection)) | |
(defn- tagged-parse [tag parse] | |
(if tag | |
[(into [tag] parse)] | |
parse)) | |
(defn- create-partial-parse [tag parse remaining-tokens] | |
[(tagged-parse tag parse) | |
remaining-tokens]) | |
(defn any | |
([] | |
(any nil)) | |
([tag] | |
(fn [tokens] | |
(if (empty? tokens) | |
nil | |
[(create-partial-parse tag | |
[(first tokens)] | |
(rest tokens))])))) | |
(deftest test-any | |
(is (= nil | |
((any) | |
[]))) | |
(is (= '[[[:x] (:y)]] | |
((any) | |
[:x :y]))) | |
(is (= '[[[[:any :x]] (:y)]] | |
((any :any) | |
[:x :y])))) | |
(defn conforms | |
([predicate] | |
(conforms nil predicate)) | |
([tag predicate] | |
(fn [tokens] | |
(when (predicate (first tokens)) | |
[(create-partial-parse tag | |
[(first tokens)] | |
(rest tokens))])))) | |
(deftest test-conforms | |
(is (= '[[[[:number 1]] ()]] | |
((conforms :number number?) | |
[1]))) | |
(is (= '[[[[:number 1]] (2)]] | |
((conforms :number number?) | |
[1 2]))) | |
(is (= nil | |
((conforms :number number?) | |
[:x])))) | |
(defn repetition | |
([minimum-repeats maximum-repeats parser] | |
(repetition nil minimum-repeats maximum-repeats parser)) | |
([tag minimum-repeats maximum-repeats parser] | |
(fn [tokens] | |
(loop [complete-parses (if (or (= 0 minimum-repeats) | |
(nil? minimum-repeats)) | |
[[[] tokens]] | |
[]) | |
forks [] | |
parse [] | |
remaining-tokens tokens | |
repeats 0] | |
;; (prn 'repeats repeats) | |
;; (prn 'parse parse) | |
;; (prn 'remaining remaining-tokens) | |
;; (prn 'forks forks) | |
(if (empty? remaining-tokens) | |
(if (empty? forks) | |
(empty-to-nil complete-parses) | |
(let [[parse remaining-tokens] (first forks)] | |
(recur complete-parses | |
(rest forks) | |
parse | |
remaining-tokens | |
repeats))) | |
(if (or (nil? maximum-repeats) | |
(< repeats maximum-repeats)) | |
(if-let [parses (parser remaining-tokens)] | |
(let [[new-parse remaining-tokens] (first parses) | |
repeats (inc repeats)] | |
(recur (if (or (nil? minimum-repeats) | |
(<= minimum-repeats | |
repeats)) | |
(conj complete-parses | |
(create-partial-parse tag | |
(concat parse new-parse) | |
remaining-tokens)) | |
complete-parses) | |
(concat forks (rest parses)) | |
(concat parse new-parse) | |
remaining-tokens | |
repeats)) | |
(empty-to-nil complete-parses)) | |
(empty-to-nil complete-parses))))))) | |
(deftest test-repetition | |
(is (= '[[[] [1 2 3]] | |
[[[:numbers [:number 1]]] (2 3)] | |
[[[:numbers [:number 1] [:number 2]]] (3)] | |
[[[:numbers [:number 1] [:number 2] [:number 3]]] ()]] | |
((repetition :numbers | |
nil | |
nil | |
(conforms :number | |
number?)) | |
[1 2 3]))) | |
(is (= '[[[] [1 2 3]] | |
[[1] (2 3)] | |
[[1 2] (3)] | |
[[1 2 3] ()]] | |
((repetition nil | |
nil | |
(conforms number?)) | |
[1 2 3]))) | |
(is (= '[[[] [1 :x]] | |
[[[:numbers [:number 1]]] (:x)]] | |
((repetition :numbers | |
nil | |
nil | |
(conforms :number | |
number?)) | |
[1 :x]))) | |
(is (= '[[[[:numbers [:number 1]]] (:x)]] | |
((repetition :numbers | |
1 | |
nil | |
(conforms :number | |
number?)) | |
[1 :x]))) | |
(is (= '[[[[:numbers [:number 1] [:number 2]]] (3 4 5)] | |
[[[:numbers [:number 1] [:number 2] [:number 3]]] (4 5)]] | |
((repetition :numbers | |
2 | |
3 | |
(conforms :number | |
number?)) | |
[1 2 3 4 5]))) | |
(is (= [[[] [:x]]] | |
((repetition :numbers | |
nil | |
nil | |
(conforms :number | |
number?)) | |
[:x]))) | |
(is (= nil | |
((repetition :numbers | |
1 | |
nil | |
(conforms :number | |
number?)) | |
[:x])))) | |
(defn none-or-more | |
([parser] | |
(none-or-more nil parser)) | |
([tag parser] | |
(repetition tag nil nil parser))) | |
(deftest test-none-or-more | |
(is (= '[[[] [1 2 3]] | |
[[[:numbers [:number 1]]] (2 3)] | |
[[[:numbers [:number 1] [:number 2]]] (3)] | |
[[[:numbers [:number 1] [:number 2] [:number 3]]] ()]] | |
((none-or-more :numbers | |
(conforms :number | |
number?)) | |
[1 2 3]))) | |
(is (= '[[[] [1 :x]] | |
[[[:numbers [:number 1]]] (:x)]] | |
((none-or-more :numbers | |
(conforms :number | |
number?)) | |
[1 :x])))) | |
(defn one-or-more | |
([parser] | |
(one-or-more nil parser)) | |
([tag parser] | |
(repetition tag 1 nil parser))) | |
(defn some-of | |
"Tries all given parsers and returns all resulting partial parses. | |
Call with or without tags: | |
(some-of :a parser-a | |
:b parser-b) | |
or | |
(some-of parser-a | |
parser-b) | |
" | |
[& parsers-or-tag-parser-pairs] | |
(assert (or (every? fn? parsers-or-tag-parser-pairs) | |
(even? (count parsers-or-tag-parser-pairs))) | |
"even number of tags and parsers or only parsers must be given") | |
(let [tag-parser-pairs (if (every? fn? parsers-or-tag-parser-pairs) | |
(for [parser parsers-or-tag-parser-pairs] | |
[nil parser]) | |
(partition 2 parsers-or-tag-parser-pairs))] | |
(fn [tokens] | |
(loop [tag-parser-pairs tag-parser-pairs | |
complete-parses []] | |
(if-let [[tag parser] (first tag-parser-pairs)] | |
(if-let [parses (parser tokens)] | |
(recur (rest tag-parser-pairs) | |
(concat complete-parses | |
(for [[parse remaining-tokens] parses] | |
(create-partial-parse tag | |
parse | |
remaining-tokens)))) | |
(recur (rest tag-parser-pairs) | |
complete-parses)) | |
(empty-to-nil complete-parses)))))) | |
(deftest test-some-of | |
(is (= '([[[:number 1]] ()]) | |
((some-of :number (conforms number?) | |
:even-number? (conforms (fn [value] | |
(and (number? value) | |
(even? value))))) | |
[1]))) | |
(is (= '([[1] ()]) | |
((some-of (conforms number?) | |
(conforms keyword?)) | |
[1]))) | |
(is (= '([[[:number 1]] ()]) | |
((some-of :even-number? (conforms (fn [value] | |
(and (number? value) | |
(even? value)))) | |
:number (conforms number?)) | |
[1]))) | |
(is (= '([[[:number 2]] ()] | |
[[[:even-number? 2]] ()]) | |
((some-of :number (conforms number?) | |
:even-number? (conforms (fn [value] | |
(and (number? value) | |
(even? value))))) | |
[2]))) | |
(is (= '([[[:number 1]] (2)]) | |
((some-of :number (conforms number?) | |
:even-number? (conforms (fn [value] | |
(and (number? value) | |
(even? value))))) | |
[1 2]))) | |
(is (= nil | |
((some-of :number (conforms number?)) | |
[:x :y]))) | |
(is (= '([[[:letter "A"]] ()]) | |
((some-of :letter (conforms (fn [value] | |
(and (string? value) | |
(= 1 (count value))))) | |
:number (conforms number?)) | |
["A"]))) | |
(is (= '([[[:number 1]] ()]) | |
((some-of :letter (conforms (fn [value] | |
(and (string? value) | |
(= 1 (count value))))) | |
:number (conforms number?)) | |
[1])))) | |
(defn catenation | |
"Applies given parsers in order. All parsers must succeed. | |
Call with or without tags: | |
(catenation :a parser-a | |
:b parser-b) | |
or | |
(catenation parser-a | |
parser-b) | |
" | |
[& parsers-or-tag-parser-pairs] | |
(assert (or (every? fn? parsers-or-tag-parser-pairs) | |
(even? (count parsers-or-tag-parser-pairs))) | |
"even number of tags and parsers or only parsers must be given") | |
(let [tag-parser-pairs (if (every? fn? parsers-or-tag-parser-pairs) | |
(for [parser parsers-or-tag-parser-pairs] | |
[nil parser]) | |
(partition 2 parsers-or-tag-parser-pairs))] | |
(fn [tokens] | |
(loop [complete-parses [] | |
forks [] | |
tag-parser-pairs tag-parser-pairs | |
parse [] | |
remaining-tokens tokens] | |
;; (prn 'parse parse) | |
;; (prn 'remaining remaining-tokens) | |
;; (prn 'forks forks) | |
(if-let [[tag parser] (first tag-parser-pairs)] | |
(if-let [parses (parser remaining-tokens)] | |
(let [[new-parse remaining-tokens] (first parses)] | |
(recur complete-parses | |
(concat forks | |
(for [[new-parse remaining-tokens] (rest parses)] | |
{:tag-parser-pairs (rest tag-parser-pairs) | |
:parse (concat parse (tagged-parse tag new-parse)) | |
:remaining-tokens remaining-tokens})) | |
(rest tag-parser-pairs) | |
(concat parse (tagged-parse tag new-parse)) | |
remaining-tokens)) | |
(if (empty? forks) | |
(empty-to-nil complete-parses) | |
(let [fork (first forks)] | |
(recur complete-parses | |
(rest forks) | |
(:tag-parser-pairs fork) | |
(:parse fork) | |
(:remaining-tokens fork))))) | |
(let [complete-parses (conj complete-parses | |
[parse | |
remaining-tokens])] | |
(if (empty? forks) | |
(empty-to-nil complete-parses) | |
(let [fork (first forks)] | |
(recur complete-parses | |
(rest forks) | |
(:tag-parser-pairs fork) | |
(:parse fork) | |
(:remaining-tokens fork)))))))))) | |
(deftest test-catenation | |
(is (= '[[([:number 1] [:keyword :a]) ()]] | |
((catenation (conforms :number number?) | |
(conforms :keyword keyword?)) | |
[1 :a]))) | |
(is (= '[[([:number 1] [:keyword :a]) ()]] | |
((catenation :number (conforms number?) | |
:keyword (conforms keyword?)) | |
[1 :a]))) | |
(is (= '[[([:prefix 1 2] :a) (3 :b)] | |
[([:prefix 1 2 :a 3] :b) ()]] | |
((catenation :prefix (none-or-more (any nil)) | |
nil (conforms keyword?)) | |
[1 2 :a 3 :b]))) | |
(is (= nil | |
((catenation :number-and-keyword | |
(conforms :keyword keyword?)) | |
[1])))) | |
(defn parse [partial-parse] | |
(first partial-parse)) | |
(defn remaining-tokens [partial-parse] | |
(second partial-parse)) | |
(defn- remove-underscores [parse] | |
(walk/prewalk (fn [value] | |
(if (sequential? value) | |
(vec (remove (fn [value] | |
(and (vector? value) | |
(= :_ (first value)))) | |
value)) | |
value)) | |
parse)) | |
(deftest test-remove-underscores | |
(is (= [[:a 1]] | |
(remove-underscores [[:a 1]]))) | |
(is (= [] | |
(remove-underscores [[:_ 1]]))) | |
(is (= [[:a 1]] | |
(remove-underscores [[:a 1] [:_ 2]]))) | |
(is (= [[:a [:b 1]]] | |
(remove-underscores [[:a [:b 1]]]))) | |
(is (= [[:a]] | |
(remove-underscores [[:a [:_ 1]]])))) | |
(defn all-parses | |
"Runs the given parser and returns resulting complete or parital | |
parses. Removes values tagged :_ from the parses." | |
[parser tokens] | |
(->> (parser tokens) | |
(map parse) | |
(map remove-underscores))) | |
(defn complete-parses | |
"Runs the given parser and returns only parses that did not leave any | |
tokens unparsed. Removes values tagged :_ from the parses." | |
[parser tokens] | |
(->> (parser tokens) | |
(filter #(empty? (remaining-tokens %))) | |
(map parse) | |
(map remove-underscores))) | |
(deftest test-parsing | |
(is (= '((1)) | |
(complete-parses (conforms number?) | |
[1]))) | |
(is (= '([[:number 1] [:keyword :x]]) | |
(complete-parses (catenation (conforms :number number?) | |
(conforms :keyword keyword?)) | |
[1 :x]))) | |
(is (= '([[:prefix :a] 1 [:postfix :b 2 :c]] | |
[[:prefix :a 1 :b] 2 [:postfix :c]]) | |
(complete-parses (catenation (none-or-more :prefix (any)) | |
(conforms number?) | |
(none-or-more :postfix (any))) | |
[:a 1 :b 2 :c]))) | |
(is (= '([:street-name-word]) | |
(complete-parses (conforms #{:street-name-word}) | |
[:street-name-word]))) | |
(is (= '([[:street-name :street-name-word :street-name-word]]) | |
(complete-parses (one-or-more :street-name (conforms #{:street-name-word})) | |
[:street-name-word :street-name-word]))) | |
(is (= '([[:apartment-specifiers [:number 1] [:letter "A"] [:number 2]]]) | |
(complete-parses (one-or-more :apartment-specifiers (some-of :letter (conforms (fn [value] | |
(and (string? value) | |
(= 1 (count value))))) | |
:number (conforms number?))) | |
[1 "A" 2]))) | |
(is (= '([[:street-name :street-name-word] | |
[:apartment-specifiers [:number 1] [:letter "A"] [:number 2]]]) | |
(complete-parses (catenation :street-name (one-or-more (conforms #{:street-name-word})) | |
:apartment-specifiers (one-or-more (some-of :letter (conforms (fn [value] | |
(and (string? value) | |
(= 1 (count value))))) | |
:number (conforms number?)))) | |
[:street-name-word 1 "A" 2]))) | |
(let [parser (catenation (none-or-more :_ (any)) | |
(some-of :po-box (catenation (conforms :_ #{:pl}) | |
(conforms number?)) | |
:street-address (catenation :street-name (one-or-more (conforms #{:street-name-word})) | |
:apartment-specifiers (one-or-more (some-of (conforms (fn [value] | |
(and (string? value) | |
(= 1 (count value))))) | |
(conforms number?))))) | |
(none-or-more :_ (any)))] | |
(is (= '([[:street-address | |
[:street-name :street-name-word] | |
[:apartment-specifiers 2]]]) | |
(complete-parses parser [:street-name-word 2]))) | |
(is (= '([[:street-address | |
[:street-name :street-name-word] | |
[:apartment-specifiers 3]]] | |
[[:street-address | |
[:street-name :street-name-word] | |
[:apartment-specifiers 1]]] | |
[[:street-address | |
[:street-name :street-name-word] | |
[:apartment-specifiers 1 "A"]]] | |
[[:street-address | |
[:street-name :street-name-word] | |
[:apartment-specifiers 1 "A" 2]]]) | |
(complete-parses parser [1 "A" :street-name-word 1 "A" 2 :street-name-word 3]))) | |
(is (= '([[:po-box 10]] | |
[[:street-address | |
[:street-name :street-name-word] | |
[:apartment-specifiers 2]]]) | |
(complete-parses parser [1 "A" :street-name-word 2 :pl 10]))) | |
(is (= '(([:street-address | |
[:street-name :street-name-word :street-name-word] | |
[:apartment-specifiers 2]]) | |
([:street-address | |
[:street-name :street-name-word] | |
[:apartment-specifiers 2]])) | |
(complete-parses parser [1 :street-name-word :word :street-name-word :street-name-word 2 :word]))))) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment