Last active
July 25, 2022 04:52
-
-
Save tydeu/718aa65f8a36b9d9e1ab210a66c81b29 to your computer and use it in GitHub Desktop.
Lean 4 example of compiling a ParserDescr to a Parsec parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/- | |
Copyright (c) 2021 Mac Malone. All rights reserved. | |
Released under the MIT license. | |
Authors: Mac Malone | |
-/ | |
import Lean.Data.Parsec | |
import Lean.Elab.ElabRules | |
import Lean.Parser | |
open Lean Syntax Parser Elab Command Parsec | |
-------------------------------------------------------------------------------- | |
-- # General Parsec Helpers | |
-------------------------------------------------------------------------------- | |
instance : MonadReaderOf String.Iterator Parsec where | |
read := fun it => .success it it | |
@[inline] def Lean.Parsec.run (s : String) (p : Parsec α) : Except String α := | |
match p s.mkIterator with | |
| .success _ a => .ok a | |
| .error _ e => .error e | |
@[inline] def Parsec.option (p : Parsec α) : Parsec (Option α) := fun it => | |
match p it with | |
| .success it a => .success it (some a) | |
| .error _ _ => .success it none | |
@[inline] def Parsec.matched (p : Parsec α) : Parsec Bool := fun it => | |
match p it with | |
| .success it _ => .success it true | |
| .error _ _ => .success it false | |
@[inline] def Parsec.withString (p : Parsec α) : Parsec (String × α) := fun it => | |
match p it with | |
| .success it' a => .success it' (it.extract it', a) | |
| .error it' e => .error it' e | |
@[inline] def Parsec.extract (p : Parsec PUnit) : Parsec String := | |
(·.1) <$> withString p | |
@[inline] def Parsec.withSubstring (p : Parsec α) : Parsec (Substring × α) := fun it => | |
match p it with | |
| .success it' a => .success it' <| | |
(it.toString.toSubstring.extract it.pos it'.pos, a) | |
| .error it' e => .error it' e | |
@[inline] def Parsec.extractSub (p : Parsec PUnit) : Parsec Substring := | |
(·.1) <$> withSubstring p | |
@[macroInline] def Parsec.orelse (p1 p2 : Parsec α) : Parsec α := | |
(attempt p1) <|> p2 | |
@[macroInline] partial def Parsec.skipSatisfy | |
(pred : Char → Bool) (errMsg := "condition not satisfied") : Parsec PUnit := do | |
unless pred (← anyChar) do fail errMsg | |
@[inline] partial def Parsec.skipMany (p : Parsec α) : Parsec PUnit := do | |
if (← matched p) then skipMany p else pure () | |
@[macroInline] def Parsec.skipMany1 (p : Parsec α) : Parsec PUnit := | |
discard p *> skipMany p | |
@[inline] def bit : Parsec Char := attempt do | |
let c ← anyChar; if c = '0' ∨ c ≤ '1' then return c else fail s!"bit expected" | |
@[inline] def octDigit : Parsec Char := attempt do | |
let c ← anyChar; if '0' ≤ c ∧ c ≤ '7' then return c else fail s!"octal digit expected" | |
-------------------------------------------------------------------------------- | |
-- # Syntax-specific Parsec Helpers | |
-------------------------------------------------------------------------------- | |
instance : Coe (Parsec (TSyntax k)) (Parsec Syntax) where | |
coe x := x | |
instance : Coe (Parsec Syntax) (Parsec (Array Syntax)) where | |
coe x := Array.singleton <$> x | |
instance : Coe (Parsec (Array (TSyntax k))) (Parsec (Array Syntax)) where | |
coe x := x | |
def Parsec.withWsInfo (p : Parsec α) : Parsec (SourceInfo × α) := do | |
let leading ← extractSub ws | |
let start ← read | |
let a ← p | |
let stop ← read | |
let trailing ← extractSub ws | |
let info := .original leading start.pos trailing stop.pos | |
return (info, a) | |
def Parsec.atomicId : Parsec String := do | |
if (← matched <| satisfy isIdBeginEscape) then | |
extract do repeat if (← matched <| satisfy isIdEndEscape) then break | |
else | |
extract do | |
skipSatisfy isIdFirst | |
skipMany <| skipSatisfy isIdRest | |
def Parsec.name : Parsec Name := do | |
let mut n := Name.anonymous | |
repeat | |
let s ← atomicId | |
n := Name.mkStr n s | |
until !(← matched <| skipChar '.') | |
return n | |
def Parsec.ident : Parsec Ident := do | |
let (info, rawVal, val) ← withWsInfo <| withSubstring <| name | |
return ⟨.ident info rawVal val []⟩ | |
def Parsec.atomOf (p : Parsec PUnit) : Parsec Syntax := do | |
let (info, val) ← withWsInfo <| extract p | |
return .atom info val | |
abbrev TAtom (val : String) := TSyntax (Name.str .anonymous val) | |
def Parsec.atom (val : String) : Parsec (TAtom val) := | |
(⟨·⟩) <$> atomOf (skipString val) | |
def Parsec.decimal : Parsec Syntax := atomOf do | |
skipMany digit | |
def Parsec.node (kind : SyntaxNodeKind) (p : Parsec (Array Syntax)) : Parsec (TSyntax kind) := do | |
return ⟨.node SourceInfo.none kind (← p)⟩ | |
def Parsec.num : Parsec NumLit := | |
node numLitKind <| Array.singleton <$> atomOf do | |
let c ← satisfy fun c => '0' ≤ c ∧ c ≤ '9' | |
if c = '0' then | |
match (← anyChar) with | |
| 'b' => skipMany1 hexDigit | |
| 'o' => skipMany1 octDigit | |
| 'x' => skipMany1 bit | |
| _ => fail "numeral expected" | |
else | |
skipMany digit | |
def Parsec.str : Parsec StrLit := | |
node strLitKind <| Array.singleton <$> atomOf do | |
skipChar '"' | |
repeat | |
match (← anyChar) with | |
| '\\' => skip | |
| '"' => break | |
| _ => continue | |
class ParsecOrElse (α : Type) (β : Type) (γ : outParam Type) where | |
pOrElse : Parsec α → Parsec β → Parsec γ | |
instance : ParsecOrElse Syntax Syntax Syntax where | |
pOrElse p1 p2 := Parsec.orelse p1 p2 | |
instance : Append SyntaxNodeKinds := inferInstanceAs (Append (List SyntaxNodeKind)) | |
instance : ParsecOrElse (TSyntax k1) (TSyntax k2) (TSyntax (k1 ++ k2)) where | |
pOrElse p1 p2 := Parsec.orelse ((⟨·.raw⟩) <$> p1) ((⟨·.raw⟩) <$> p2) | |
instance : ParsecOrElse (Array (TSyntax k1)) (TSyntax k2) (Array (TSyntax (k1 ++ k2))) where | |
pOrElse p1 p2 := Parsec.orelse | |
((·.map (⟨·.raw⟩)) <$> p1) | |
((fun x => Array.singleton ⟨x.raw⟩) <$> p2) | |
instance : ParsecOrElse (TSyntax k1) (Array (TSyntax k2)) (Array (TSyntax (k1 ++ k2))) where | |
pOrElse p1 p2 := Parsec.orelse | |
((fun x => Array.singleton ⟨x.raw⟩) <$> p1) | |
((·.map (⟨·.raw⟩)) <$> p2) | |
def Parsec.andthen (p1 p2 : Parsec (Array Syntax)) : Parsec (Array Syntax) := do | |
let a1 ← p1 | |
let a2 ← p2 | |
return a1 ++ a2 | |
def Parsec.optional (p : Parsec (Array Syntax)) : Parsec Syntax := fun it => | |
match p it with | |
| .success it args => .success it (mkNullNode args) | |
| .error _ _ => .success it mkNullNode | |
def Parsec.sepByTrailing (initArgs : Array Syntax) | |
(p : Parsec Syntax) (sep : Parsec Syntax) : Parsec Syntax := do | |
let mut args := initArgs | |
repeat if let some a ← option p then | |
args := args.push a | |
if let some s ← option sep then | |
args := args.push s | |
else | |
break | |
return mkNullNode args | |
def Parsec.sepBy1NoTrailing (initArgs : Array Syntax) | |
(p : Parsec Syntax) (sep : Parsec Syntax) : Parsec Syntax := do | |
let mut args := initArgs | |
repeat | |
let a ← p | |
args := args.push a | |
if let some s ← option sep then | |
args := args.push s | |
else | |
break | |
return mkNullNode args | |
def Parsec.sepBy (p : Parsec Syntax) (sep : Parsec Syntax) (allowTrailingSep : Bool) : Parsec Syntax := do | |
if allowTrailingSep then | |
sepByTrailing #[] p sep | |
else if let some a ← option p then | |
if let some s ← option sep then | |
sepBy1NoTrailing #[a, s] p sep | |
else | |
return mkNullNode #[a] | |
else | |
return mkNullNode | |
def Parsec.sepBy1 (p : Parsec Syntax) (sep : Parsec Syntax) (allowTrailingSep : Bool) : Parsec Syntax := do | |
let a ← p | |
if let some s ← option sep then | |
if allowTrailingSep then | |
sepByTrailing #[a, s] p sep | |
else | |
return mkNullNode #[a] | |
else | |
sepBy1NoTrailing #[] p sep | |
-- Just for testing purposes | |
def Parsec.term : Parsec Syntax := do | |
ident <|> num <|> str | |
-------------------------------------------------------------------------------- | |
-- # Compilation Command | |
-------------------------------------------------------------------------------- | |
def aliases := | |
({} : NameMap Name) | |
|>.insert `orelse ``ParsecOrElse.pOrElse | |
|>.insert `andthen ``Parsec.andthen | |
|>.insert `optional ``Parsec.optional | |
|>.insert `many ``Parsec.many | |
|>.insert `many1 ``Parsec.many1 | |
|>.insert `num ``Parsec.num | |
|>.insert `ident ``Parsec.ident | |
|>.insert `term ``Parsec.term | |
|>.insert `decimal ``Parsec.decimal | |
def compiledName (p : Name) : Name := | |
.str p "parsec" | |
instance : Coe Name Ident where | |
coe := mkIdent | |
elab "if_not_def " id:ident cmds:many1Indent(command) : command => do | |
let id := id.getId.replacePrefix rootNamespace Name.anonymous | |
unless (← getEnv).contains id do | |
withMacroExpansion (← getRef) cmds <| elabCommand cmds | |
syntax "compile_parser_descr " ident : command | |
def compileParserDescr (root : Name) : ParserDescr → MacroM Command | |
| .const name => do | |
let some alias := aliases.find? name | |
| Macro.throwError s!"no parser alias defined for alias `{name}`" | |
`(abbrev $root := $alias) | |
| .unary name p => do | |
let some alias := aliases.find? name | |
| Macro.throwError s!"no parser alias defined for alias `{name}`" | |
let pName := Name.str root "op" | |
`( | |
$(← compileParserDescr pName p) | |
@[inline] def $root := $alias ↑$pName | |
) | |
| .binary name p1 p2 => do | |
let some alias := aliases.find? name | |
| Macro.throwError s!"no parser alias defined for alias `{name}`" | |
let p1Name := Name.str root "lhs" | |
let p2Name := Name.str root "rhs" | |
`( | |
$(← compileParserDescr p1Name p1) | |
$(← compileParserDescr p2Name p2) | |
@[inline] def $root := $alias ↑$p1Name ↑$p2Name | |
) | |
| .node kind _prec p => do -- Only partial support | |
let pName := Name.str root "raw" | |
`( | |
$(← compileParserDescr pName p):command | |
@[inline] def $root := Parsec.node $(quote kind) ↑$pName | |
) | |
| .trailingNode kind _prec _lhsPrec p => do -- Only partial support | |
let pName := Name.str root "raw" | |
`( | |
$(← compileParserDescr pName p):command | |
@[inline] def $root := Parsec.node $(quote kind) ↑$pName | |
) | |
| .symbol val => | |
`(@[inline] def $root := Parsec.atom $(quote val.trim)) | |
| .nonReservedSymbol val _includeIdent => | |
`(@[inline] def $root := Parsec.atom $(quote val.trim)) | |
| .cat catName _rbp => do -- Only partial support | |
let some alias := aliases.find? catName | |
| Macro.throwError s!"no parser alias defined for category `{catName}`" | |
`(abbrev $root := $alias) | |
| .parser name => do | |
let some alias := aliases.find? name | |
| Macro.throwError s!"no parser alias defined for parser `{name}`" | |
`(abbrev $root := $alias) | |
| .nodeWithAntiquot _name kind p => do | |
let nName := compiledName kind | |
let pName := Name.str nName "raw" | |
let nName := `_root_ ++ nName | |
let mut cmds := #[] | |
let pName := `_root_ ++ pName | |
cmds := cmds.push <| ← `( | |
if_not_def $pName | |
$(← compileParserDescr pName p):command | |
def $nName := Parsec.node $(quote kind) ↑$pName | |
) | |
unless root == nName do | |
cmds := cmds.push <| ← `(abbrev $root := $nName) | |
return ⟨mkNullNode cmds⟩ | |
| .sepBy p _sep psep allowTrailingSep => do | |
let pName := Name.str root "elem" | |
let sName := Name.str root "sep" | |
`( | |
$(← compileParserDescr pName p):command | |
$(← compileParserDescr sName psep):command | |
@[inline] def $root := Parsec.sepBy ↑$pName ↑$sName $(quote allowTrailingSep) | |
) | |
| .sepBy1 p _sep psep allowTrailingSep => do | |
let pName := Name.str root "elem" | |
let sName := Name.str root "sep" | |
`( | |
$(← compileParserDescr pName p):command | |
$(← compileParserDescr sName psep):command | |
@[inline] def $root := Parsec.sepBy1 ↑$pName ↑$sName $(quote allowTrailingSep) | |
) | |
unsafe def unsafeEvalParserDescr | |
(env : Environment) (opts : Options) (name : Name) : Except String ParserDescr := | |
env.evalConstCheck ParserDescr opts ``ParserDescr name |>.run.run | |
@[implementedBy unsafeEvalParserDescr] opaque evalParserDescr | |
(env : Environment) (opts : Options) (name : Name) : Except String ParserDescr | |
elab_rules : command | `(compile_parser_descr $id) => do | |
let name ← resolveGlobalConstNoOverload id | |
let rootName := `_root_ ++ (compiledName name) | |
let descr ← IO.ofExcept <| evalParserDescr (← getEnv) (← getOptions) name | |
let cmd ← liftMacroM <| compileParserDescr rootName descr | |
withMacroExpansion (← getRef) cmd <| elabCommand cmd | |
-------------------------------------------------------------------------------- | |
-- # Tests | |
-------------------------------------------------------------------------------- | |
instance [ToString ε] [Eval α] : Eval (Except ε α) where | |
eval f hideUnit := | |
match f () with | |
| .ok a => Eval.eval (fun _ => a) hideUnit | |
| .error e => throw <| IO.userError <| toString e | |
instance : Eval Syntax where | |
eval f _ := IO.println <| f () | |
-- Simple example of a symbol (an atom) wrapped in a node | |
syntax ex1 := "foo" | |
#print ex1 | |
compile_parser_descr ex1 | |
#print ex1.parsec | |
#print ex1.parsec.raw | |
#eval ex1.parsec.run " foo " | |
-- Reference to a pre-defined (and already compiled) piece of syntax | |
syntax ex1i := ex1 | |
#print ex1i | |
compile_parser_descr ex1i | |
#eval ex1i.parsec.run " foo " | |
-- test that we properly compile syntax within a namespace | |
namespace foo | |
syntax ex1i := ex1 | |
#print ex1i | |
compile_parser_descr ex1i | |
#eval ex1i.parsec.run " foo " | |
end foo | |
/- | |
Example of the or-else combinator with a reference to | |
an as of yet compiled piece of syntax | |
-/ | |
syntax ex1b := "bar" | |
syntax exA := ex1 <|> ex1b | |
compile_parser_descr exA | |
#print exA.parsec.raw | |
#eval exA.parsec.run " foo " | |
#eval exA.parsec.run " bar " | |
-- Example of the and-then parser combinator | |
syntax ex2 := exA "baz" | |
compile_parser_descr ex2 | |
#print ex2.parsec.raw | |
#eval ex2.parsec.run " foo baz " | |
-- Note that we cannot use a real parser unless we have defined an alias for it | |
syntax exP := Command.infix | |
--compile_parser_descr exP -- errors, as expected | |
-- Example of using our own builtin aliases (i.e., `decimal`) | |
def decimal : ParserDescr := .const `decimal | |
syntax exSepDigit := "[" decimal,* "]" | |
#print exSepDigit | |
compile_parser_descr exSepDigit | |
#print exSepDigit.parsec.raw | |
#eval exSepDigit.parsec.run "[]" | |
--#eval exSepDigit.parsec.run "[0,] " | |
#eval exSepDigit.parsec.run " [0, 1]" | |
-- Demonstration of compiling builtin Lean syntax | |
compile_parser_descr explicitBinders | |
#check binderIdent.parsec.raw | |
#eval binderIdent.parsec.run "foo.bar" | |
#eval explicitBinders.parsec.run "(foo.bar : Nat) (_ : String)" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment