Skip to content

Instantly share code, notes, and snippets.

@gebner
Created April 1, 2017 14:51
Show Gist options
  • Save gebner/b4c12bf19dfc96abead83c5864c4ef44 to your computer and use it in GitHub Desktop.
Save gebner/b4c12bf19dfc96abead83c5864c4ef44 to your computer and use it in GitHub Desktop.
Parser combinators for char_buffers
import data.buffer
universes u v
inductive parse_result (α : Type u)
| done (pos : ℕ) (result : α) : parse_result
| fail (pos : ℕ) (expected : list (unit → string)) : parse_result
def parser (α : Type u) :=
∀ (input : char_buffer) (start : ℕ), parse_result α
namespace parser
variables {α : Type u} {β : Type v}
protected def bind (p : parser α) (f : α → parser β) : parser β :=
λ input pos, match p input pos with
| parse_result.done pos a := f a input pos
| parse_result.fail ._ pos expected := parse_result.fail β pos expected
end
protected def pure (a : α) : parser α :=
λ input pos, parse_result.done pos a
private lemma id_map (p : parser α) : parser.bind p parser.pure = p :=
begin
apply funext, intro input,
apply funext, intro pos,
dunfold parser.bind,
cases (p input pos); exact rfl
end
private lemma bind_assoc {α β γ : Type u} (p : parser α) (q : α → parser β) (r : β → parser γ) :
parser.bind (parser.bind p q) r = parser.bind p (λ a, parser.bind (q a) r) :=
begin
apply funext, intro input,
apply funext, intro pos,
dunfold parser.bind,
cases (p input pos); dunfold bind,
cases (q result input pos_1); dunfold bind,
all_goals {refl}
end
protected def fail (msg : string) : parser α :=
-- TODO(gabriel): cleaner error message handling
λ _ pos, parse_result.fail α pos [λ _, msg]
instance : monad_fail parser :=
{ pure := @parser.pure,
bind := @parser.bind,
fail := @parser.fail,
id_map := @id_map,
pure_bind := λ _ _ _ _, rfl,
bind_assoc := @bind_assoc }
protected def failure : parser α :=
λ _ pos, parse_result.fail α pos []
protected def orelse (p q : parser α) : parser α :=
λ input pos, match p input pos with
| parse_result.fail ._ pos₁ expected₁ :=
if pos₁ ≠ pos then parse_result.fail _ pos₁ expected₁ else
match q input pos with
| parse_result.fail ._ pos₂ expected₂ :=
if pos₁ < pos₂ then
parse_result.fail _ pos₁ expected₁
else if pos₂ < pos₁ then
parse_result.fail _ pos₂ expected₂
else -- pos₁ = pos₂
parse_result.fail _ pos₁ (expected₁ ++ expected₂)
| ok := ok
end
| ok := ok
end
instance : alternative parser :=
{ parser.monad_fail with
failure := @parser.failure,
orelse := @parser.orelse }
-- TODO(gabriel): add generic instance for alternative
instance : inhabited (parser α) :=
⟨parser.failure⟩
def decorate_errors (msgs : thunk (list (thunk string))) (p : parser α) : parser α :=
λ input pos, match p input pos with
| parse_result.fail ._ pos' expected :=
parse_result.fail _ pos (msgs ())
| ok := ok
end
def decorate_error (msg : thunk string) (p : parser α) : parser α :=
decorate_errors [msg] p
def sat (p : char → Prop) [decidable_pred p] : parser char :=
λ input pos,
if h : pos < input.size then
let c := input.read ⟨pos, h⟩ in
if p c then
parse_result.done (pos+1) $ input.read ⟨pos, h⟩
else
parse_result.fail _ pos []
else
parse_result.fail _ pos []
def eps : parser unit := return ()
def ch (c : char) : parser unit :=
decorate_error [c] $ sat (= c) >> eps
def char_buf (s : char_buffer) : parser unit :=
decorate_error s.to_string $ monad.for' s.to_list ch
def one_of (cs : list char) : parser unit :=
decorate_errors (cs.map (λ c _, [c])) $ sat (∈ cs) >> eps
def str (s : string) : parser unit :=
decorate_error s $ monad.for' s.reverse ch
-- defined by alternative: optional, <|>
def remaining : parser ℕ :=
λ input pos, parse_result.done pos (input.size - pos)
def eof : parser unit :=
decorate_error "<end-of-file>" $
do rem ← remaining, guard $ rem = 0
def many_core (p : parser α) : ∀ (reps : ℕ), parser (list α)
| 0 := failure
| (reps+1) := (do x ← p, xs ← many_core reps, return (x::xs)) <|> return []
def many (p : parser α) : parser (list α) :=
λ input pos, many_core p (input.size - pos + 1) input pos
def many' {α : Type} (p : parser α) : parser unit :=
many p >> eps
def many1 (p : parser α) : parser (list α) :=
list.cons <$> p <*> many p
def sep_by1 {α : Type} (sep : parser unit) (p : parser α) : parser (list α) :=
list.cons <$> p <*> many (sep >> p)
def sep_by {α : Type} (sep : parser unit) (p : parser α) : parser (list α) :=
sep_by1 sep p <|> return []
def fix_core (F : parser α → parser α) : ∀ (max_depth : ℕ), parser α
| 0 := failure
| (max_depth+1) := F (fix_core max_depth)
def fix (F : parser α → parser α) : parser α :=
λ input pos, fix_core F (input.size - pos + 1) input pos
private def nl2spc : char → char | #"\n" := #" " | c := c
def mk_error_msg (input : char_buffer) (pos : ℕ) (expected : list (thunk string)) : char_buffer :=
let l := input.to_list,
left_ctx := ((l.taken pos).reverse.taken 10).reverse,
right_ctx := (l.dropn pos).taken 10,
out := (buffer.nil : char_buffer),
out := out.append_list $ left_ctx.map nl2spc,
out := out.append_list $ right_ctx.map nl2spc,
out := out.push_back #"\n",
out := out.append_list $ left_ctx.map (λ _, #" "),
out := out.push_back #"^",
out := out.push_back #"\n",
out := out.push_back #"\n",
out := out.append_string "expected: ",
out := out.append_string $ list.intercalate " | " (expected.map (λ t : thunk string, (t ()))),
out := out.push_back #"\n"
in out
def run {α : Type} (p : parser α) (input : char_buffer) : sum string α :=
match (p <* eof) input 0 with
| parse_result.done pos res := sum.inr res
| parse_result.fail ._ pos expected :=
sum.inl $ buffer.to_string $ mk_error_msg input pos expected
end
def runs {α : Type} (p : parser α) (input : string) : sum string α :=
run p (buffer.nil.append_string input)
end parser
namespace char
def is_whitespace : char → Prop
| #" " := true
| #"\t" := true
| #"\n" := true
| _ := false
instance decidable_is_whitespace : decidable_pred is_whitespace :=
begin intro c, delta is_whitespace, apply_instance end
def is_upper (c : char) : Prop :=
c.val ≥ 65 ∧ c.val ≤ 90
def is_lower (c : char) : Prop :=
c.val ≥ 97 ∧ c.val ≤ 122
def is_alpha (c : char) : Prop :=
c.is_upper ∨ c.is_lower
def is_digit (c : char) : Prop :=
let n := to_nat c in n >= 48 ∧ n <= 57
def is_alphanum (c : char) : Prop :=
is_alpha c ∨ is_digit c
def is_punctuation (c : char) : Prop :=
c ∈ [#" ", #",", #".", #"?", #"!", #";", #"-", #"'"]
def to_lower (c : char) : char :=
let n := to_nat c in
if n >= 65 ∧ n <= 90 then of_nat (n + 32) else c
-- TODO(Jeremy): automate this boilerplate
instance decidable_is_upper : decidable_pred is_upper :=
begin intro c, delta is_upper, apply_instance end
instance decidable_is_lower : decidable_pred is_lower :=
begin intro c, delta is_lower, apply_instance end
instance decidable_is_alpha : decidable_pred is_alpha :=
begin intro c, delta is_alpha, apply_instance end
instance decidable_is_digit : decidable_pred is_digit :=
begin intro c, delta is_digit, apply_instance end
instance decidable_is_alphanum : decidable_pred is_alphanum :=
begin intro c, delta is_alphanum, apply_instance end
instance decidable_is_punctuation : decidable_pred is_punctuation :=
begin intro c, delta is_punctuation, apply_instance end
end char
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment