Created
April 1, 2017 14:51
-
-
Save gebner/b4c12bf19dfc96abead83c5864c4ef44 to your computer and use it in GitHub Desktop.
Parser combinators for char_buffers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import data.buffer | |
universes u v | |
inductive parse_result (α : Type u) | |
| done (pos : ℕ) (result : α) : parse_result | |
| fail (pos : ℕ) (expected : list (unit → string)) : parse_result | |
def parser (α : Type u) := | |
∀ (input : char_buffer) (start : ℕ), parse_result α | |
namespace parser | |
variables {α : Type u} {β : Type v} | |
protected def bind (p : parser α) (f : α → parser β) : parser β := | |
λ input pos, match p input pos with | |
| parse_result.done pos a := f a input pos | |
| parse_result.fail ._ pos expected := parse_result.fail β pos expected | |
end | |
protected def pure (a : α) : parser α := | |
λ input pos, parse_result.done pos a | |
private lemma id_map (p : parser α) : parser.bind p parser.pure = p := | |
begin | |
apply funext, intro input, | |
apply funext, intro pos, | |
dunfold parser.bind, | |
cases (p input pos); exact rfl | |
end | |
private lemma bind_assoc {α β γ : Type u} (p : parser α) (q : α → parser β) (r : β → parser γ) : | |
parser.bind (parser.bind p q) r = parser.bind p (λ a, parser.bind (q a) r) := | |
begin | |
apply funext, intro input, | |
apply funext, intro pos, | |
dunfold parser.bind, | |
cases (p input pos); dunfold bind, | |
cases (q result input pos_1); dunfold bind, | |
all_goals {refl} | |
end | |
protected def fail (msg : string) : parser α := | |
-- TODO(gabriel): cleaner error message handling | |
λ _ pos, parse_result.fail α pos [λ _, msg] | |
instance : monad_fail parser := | |
{ pure := @parser.pure, | |
bind := @parser.bind, | |
fail := @parser.fail, | |
id_map := @id_map, | |
pure_bind := λ _ _ _ _, rfl, | |
bind_assoc := @bind_assoc } | |
protected def failure : parser α := | |
λ _ pos, parse_result.fail α pos [] | |
protected def orelse (p q : parser α) : parser α := | |
λ input pos, match p input pos with | |
| parse_result.fail ._ pos₁ expected₁ := | |
if pos₁ ≠ pos then parse_result.fail _ pos₁ expected₁ else | |
match q input pos with | |
| parse_result.fail ._ pos₂ expected₂ := | |
if pos₁ < pos₂ then | |
parse_result.fail _ pos₁ expected₁ | |
else if pos₂ < pos₁ then | |
parse_result.fail _ pos₂ expected₂ | |
else -- pos₁ = pos₂ | |
parse_result.fail _ pos₁ (expected₁ ++ expected₂) | |
| ok := ok | |
end | |
| ok := ok | |
end | |
instance : alternative parser := | |
{ parser.monad_fail with | |
failure := @parser.failure, | |
orelse := @parser.orelse } | |
-- TODO(gabriel): add generic instance for alternative | |
instance : inhabited (parser α) := | |
⟨parser.failure⟩ | |
def decorate_errors (msgs : thunk (list (thunk string))) (p : parser α) : parser α := | |
λ input pos, match p input pos with | |
| parse_result.fail ._ pos' expected := | |
parse_result.fail _ pos (msgs ()) | |
| ok := ok | |
end | |
def decorate_error (msg : thunk string) (p : parser α) : parser α := | |
decorate_errors [msg] p | |
def sat (p : char → Prop) [decidable_pred p] : parser char := | |
λ input pos, | |
if h : pos < input.size then | |
let c := input.read ⟨pos, h⟩ in | |
if p c then | |
parse_result.done (pos+1) $ input.read ⟨pos, h⟩ | |
else | |
parse_result.fail _ pos [] | |
else | |
parse_result.fail _ pos [] | |
def eps : parser unit := return () | |
def ch (c : char) : parser unit := | |
decorate_error [c] $ sat (= c) >> eps | |
def char_buf (s : char_buffer) : parser unit := | |
decorate_error s.to_string $ monad.for' s.to_list ch | |
def one_of (cs : list char) : parser unit := | |
decorate_errors (cs.map (λ c _, [c])) $ sat (∈ cs) >> eps | |
def str (s : string) : parser unit := | |
decorate_error s $ monad.for' s.reverse ch | |
-- defined by alternative: optional, <|> | |
def remaining : parser ℕ := | |
λ input pos, parse_result.done pos (input.size - pos) | |
def eof : parser unit := | |
decorate_error "<end-of-file>" $ | |
do rem ← remaining, guard $ rem = 0 | |
def many_core (p : parser α) : ∀ (reps : ℕ), parser (list α) | |
| 0 := failure | |
| (reps+1) := (do x ← p, xs ← many_core reps, return (x::xs)) <|> return [] | |
def many (p : parser α) : parser (list α) := | |
λ input pos, many_core p (input.size - pos + 1) input pos | |
def many' {α : Type} (p : parser α) : parser unit := | |
many p >> eps | |
def many1 (p : parser α) : parser (list α) := | |
list.cons <$> p <*> many p | |
def sep_by1 {α : Type} (sep : parser unit) (p : parser α) : parser (list α) := | |
list.cons <$> p <*> many (sep >> p) | |
def sep_by {α : Type} (sep : parser unit) (p : parser α) : parser (list α) := | |
sep_by1 sep p <|> return [] | |
def fix_core (F : parser α → parser α) : ∀ (max_depth : ℕ), parser α | |
| 0 := failure | |
| (max_depth+1) := F (fix_core max_depth) | |
def fix (F : parser α → parser α) : parser α := | |
λ input pos, fix_core F (input.size - pos + 1) input pos | |
private def nl2spc : char → char | #"\n" := #" " | c := c | |
def mk_error_msg (input : char_buffer) (pos : ℕ) (expected : list (thunk string)) : char_buffer := | |
let l := input.to_list, | |
left_ctx := ((l.taken pos).reverse.taken 10).reverse, | |
right_ctx := (l.dropn pos).taken 10, | |
out := (buffer.nil : char_buffer), | |
out := out.append_list $ left_ctx.map nl2spc, | |
out := out.append_list $ right_ctx.map nl2spc, | |
out := out.push_back #"\n", | |
out := out.append_list $ left_ctx.map (λ _, #" "), | |
out := out.push_back #"^", | |
out := out.push_back #"\n", | |
out := out.push_back #"\n", | |
out := out.append_string "expected: ", | |
out := out.append_string $ list.intercalate " | " (expected.map (λ t : thunk string, (t ()))), | |
out := out.push_back #"\n" | |
in out | |
def run {α : Type} (p : parser α) (input : char_buffer) : sum string α := | |
match (p <* eof) input 0 with | |
| parse_result.done pos res := sum.inr res | |
| parse_result.fail ._ pos expected := | |
sum.inl $ buffer.to_string $ mk_error_msg input pos expected | |
end | |
def runs {α : Type} (p : parser α) (input : string) : sum string α := | |
run p (buffer.nil.append_string input) | |
end parser | |
namespace char | |
def is_whitespace : char → Prop | |
| #" " := true | |
| #"\t" := true | |
| #"\n" := true | |
| _ := false | |
instance decidable_is_whitespace : decidable_pred is_whitespace := | |
begin intro c, delta is_whitespace, apply_instance end | |
def is_upper (c : char) : Prop := | |
c.val ≥ 65 ∧ c.val ≤ 90 | |
def is_lower (c : char) : Prop := | |
c.val ≥ 97 ∧ c.val ≤ 122 | |
def is_alpha (c : char) : Prop := | |
c.is_upper ∨ c.is_lower | |
def is_digit (c : char) : Prop := | |
let n := to_nat c in n >= 48 ∧ n <= 57 | |
def is_alphanum (c : char) : Prop := | |
is_alpha c ∨ is_digit c | |
def is_punctuation (c : char) : Prop := | |
c ∈ [#" ", #",", #".", #"?", #"!", #";", #"-", #"'"] | |
def to_lower (c : char) : char := | |
let n := to_nat c in | |
if n >= 65 ∧ n <= 90 then of_nat (n + 32) else c | |
-- TODO(Jeremy): automate this boilerplate | |
instance decidable_is_upper : decidable_pred is_upper := | |
begin intro c, delta is_upper, apply_instance end | |
instance decidable_is_lower : decidable_pred is_lower := | |
begin intro c, delta is_lower, apply_instance end | |
instance decidable_is_alpha : decidable_pred is_alpha := | |
begin intro c, delta is_alpha, apply_instance end | |
instance decidable_is_digit : decidable_pred is_digit := | |
begin intro c, delta is_digit, apply_instance end | |
instance decidable_is_alphanum : decidable_pred is_alphanum := | |
begin intro c, delta is_alphanum, apply_instance end | |
instance decidable_is_punctuation : decidable_pred is_punctuation := | |
begin intro c, delta is_punctuation, apply_instance end | |
end char |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment