Skip to content

Instantly share code, notes, and snippets.

@tydeu
Last active July 25, 2022 04:52
Show Gist options
  • Save tydeu/718aa65f8a36b9d9e1ab210a66c81b29 to your computer and use it in GitHub Desktop.
Save tydeu/718aa65f8a36b9d9e1ab210a66c81b29 to your computer and use it in GitHub Desktop.
Lean 4 example of compiling a ParserDescr to a Parsec parser
/-
Copyright (c) 2021 Mac Malone. All rights reserved.
Released under the MIT license.
Authors: Mac Malone
-/
import Lean.Data.Parsec
import Lean.Elab.ElabRules
import Lean.Parser
open Lean Syntax Parser Elab Command Parsec
--------------------------------------------------------------------------------
-- # General Parsec Helpers
--------------------------------------------------------------------------------
instance : MonadReaderOf String.Iterator Parsec where
read := fun it => .success it it
@[inline] def Lean.Parsec.run (s : String) (p : Parsec α) : Except String α :=
match p s.mkIterator with
| .success _ a => .ok a
| .error _ e => .error e
@[inline] def Parsec.option (p : Parsec α) : Parsec (Option α) := fun it =>
match p it with
| .success it a => .success it (some a)
| .error _ _ => .success it none
@[inline] def Parsec.matched (p : Parsec α) : Parsec Bool := fun it =>
match p it with
| .success it _ => .success it true
| .error _ _ => .success it false
@[inline] def Parsec.withString (p : Parsec α) : Parsec (String × α) := fun it =>
match p it with
| .success it' a => .success it' (it.extract it', a)
| .error it' e => .error it' e
@[inline] def Parsec.extract (p : Parsec PUnit) : Parsec String :=
(·.1) <$> withString p
@[inline] def Parsec.withSubstring (p : Parsec α) : Parsec (Substring × α) := fun it =>
match p it with
| .success it' a => .success it' <|
(it.toString.toSubstring.extract it.pos it'.pos, a)
| .error it' e => .error it' e
@[inline] def Parsec.extractSub (p : Parsec PUnit) : Parsec Substring :=
(·.1) <$> withSubstring p
@[macroInline] def Parsec.orelse (p1 p2 : Parsec α) : Parsec α :=
(attempt p1) <|> p2
@[macroInline] partial def Parsec.skipSatisfy
(pred : Char → Bool) (errMsg := "condition not satisfied") : Parsec PUnit := do
unless pred (← anyChar) do fail errMsg
@[inline] partial def Parsec.skipMany (p : Parsec α) : Parsec PUnit := do
if (← matched p) then skipMany p else pure ()
@[macroInline] def Parsec.skipMany1 (p : Parsec α) : Parsec PUnit :=
discard p *> skipMany p
@[inline] def bit : Parsec Char := attempt do
let c ← anyChar; if c = '0' ∨ c ≤ '1' then return c else fail s!"bit expected"
@[inline] def octDigit : Parsec Char := attempt do
let c ← anyChar; if '0' ≤ c ∧ c ≤ '7' then return c else fail s!"octal digit expected"
--------------------------------------------------------------------------------
-- # Syntax-specific Parsec Helpers
--------------------------------------------------------------------------------
instance : Coe (Parsec (TSyntax k)) (Parsec Syntax) where
coe x := x
instance : Coe (Parsec Syntax) (Parsec (Array Syntax)) where
coe x := Array.singleton <$> x
instance : Coe (Parsec (Array (TSyntax k))) (Parsec (Array Syntax)) where
coe x := x
def Parsec.withWsInfo (p : Parsec α) : Parsec (SourceInfo × α) := do
let leading ← extractSub ws
let start ← read
let a ← p
let stop ← read
let trailing ← extractSub ws
let info := .original leading start.pos trailing stop.pos
return (info, a)
def Parsec.atomicId : Parsec String := do
if (← matched <| satisfy isIdBeginEscape) then
extract do repeat if (← matched <| satisfy isIdEndEscape) then break
else
extract do
skipSatisfy isIdFirst
skipMany <| skipSatisfy isIdRest
def Parsec.name : Parsec Name := do
let mut n := Name.anonymous
repeat
let s ← atomicId
n := Name.mkStr n s
until !(← matched <| skipChar '.')
return n
def Parsec.ident : Parsec Ident := do
let (info, rawVal, val) ← withWsInfo <| withSubstring <| name
return ⟨.ident info rawVal val []⟩
def Parsec.atomOf (p : Parsec PUnit) : Parsec Syntax := do
let (info, val) ← withWsInfo <| extract p
return .atom info val
abbrev TAtom (val : String) := TSyntax (Name.str .anonymous val)
def Parsec.atom (val : String) : Parsec (TAtom val) :=
(⟨·⟩) <$> atomOf (skipString val)
def Parsec.decimal : Parsec Syntax := atomOf do
skipMany digit
def Parsec.node (kind : SyntaxNodeKind) (p : Parsec (Array Syntax)) : Parsec (TSyntax kind) := do
return ⟨.node SourceInfo.none kind (← p)⟩
def Parsec.num : Parsec NumLit :=
node numLitKind <| Array.singleton <$> atomOf do
let c ← satisfy fun c => '0' ≤ c ∧ c ≤ '9'
if c = '0' then
match (← anyChar) with
| 'b' => skipMany1 hexDigit
| 'o' => skipMany1 octDigit
| 'x' => skipMany1 bit
| _ => fail "numeral expected"
else
skipMany digit
def Parsec.str : Parsec StrLit :=
node strLitKind <| Array.singleton <$> atomOf do
skipChar '"'
repeat
match (← anyChar) with
| '\\' => skip
| '"' => break
| _ => continue
class ParsecOrElse (α : Type) (β : Type) (γ : outParam Type) where
pOrElse : Parsec α → Parsec β → Parsec γ
instance : ParsecOrElse Syntax Syntax Syntax where
pOrElse p1 p2 := Parsec.orelse p1 p2
instance : Append SyntaxNodeKinds := inferInstanceAs (Append (List SyntaxNodeKind))
instance : ParsecOrElse (TSyntax k1) (TSyntax k2) (TSyntax (k1 ++ k2)) where
pOrElse p1 p2 := Parsec.orelse ((⟨·.raw⟩) <$> p1) ((⟨·.raw⟩) <$> p2)
instance : ParsecOrElse (Array (TSyntax k1)) (TSyntax k2) (Array (TSyntax (k1 ++ k2))) where
pOrElse p1 p2 := Parsec.orelse
((·.map (⟨·.raw⟩)) <$> p1)
((fun x => Array.singleton ⟨x.raw⟩) <$> p2)
instance : ParsecOrElse (TSyntax k1) (Array (TSyntax k2)) (Array (TSyntax (k1 ++ k2))) where
pOrElse p1 p2 := Parsec.orelse
((fun x => Array.singleton ⟨x.raw⟩) <$> p1)
((·.map (⟨·.raw⟩)) <$> p2)
def Parsec.andthen (p1 p2 : Parsec (Array Syntax)) : Parsec (Array Syntax) := do
let a1 ← p1
let a2 ← p2
return a1 ++ a2
def Parsec.optional (p : Parsec (Array Syntax)) : Parsec Syntax := fun it =>
match p it with
| .success it args => .success it (mkNullNode args)
| .error _ _ => .success it mkNullNode
def Parsec.sepByTrailing (initArgs : Array Syntax)
(p : Parsec Syntax) (sep : Parsec Syntax) : Parsec Syntax := do
let mut args := initArgs
repeat if let some a ← option p then
args := args.push a
if let some s ← option sep then
args := args.push s
else
break
return mkNullNode args
def Parsec.sepBy1NoTrailing (initArgs : Array Syntax)
(p : Parsec Syntax) (sep : Parsec Syntax) : Parsec Syntax := do
let mut args := initArgs
repeat
let a ← p
args := args.push a
if let some s ← option sep then
args := args.push s
else
break
return mkNullNode args
def Parsec.sepBy (p : Parsec Syntax) (sep : Parsec Syntax) (allowTrailingSep : Bool) : Parsec Syntax := do
if allowTrailingSep then
sepByTrailing #[] p sep
else if let some a ← option p then
if let some s ← option sep then
sepBy1NoTrailing #[a, s] p sep
else
return mkNullNode #[a]
else
return mkNullNode
def Parsec.sepBy1 (p : Parsec Syntax) (sep : Parsec Syntax) (allowTrailingSep : Bool) : Parsec Syntax := do
let a ← p
if let some s ← option sep then
if allowTrailingSep then
sepByTrailing #[a, s] p sep
else
return mkNullNode #[a]
else
sepBy1NoTrailing #[] p sep
-- Just for testing purposes
def Parsec.term : Parsec Syntax := do
ident <|> num <|> str
--------------------------------------------------------------------------------
-- # Compilation Command
--------------------------------------------------------------------------------
def aliases :=
({} : NameMap Name)
|>.insert `orelse ``ParsecOrElse.pOrElse
|>.insert `andthen ``Parsec.andthen
|>.insert `optional ``Parsec.optional
|>.insert `many ``Parsec.many
|>.insert `many1 ``Parsec.many1
|>.insert `num ``Parsec.num
|>.insert `ident ``Parsec.ident
|>.insert `term ``Parsec.term
|>.insert `decimal ``Parsec.decimal
def compiledName (p : Name) : Name :=
.str p "parsec"
instance : Coe Name Ident where
coe := mkIdent
elab "if_not_def " id:ident cmds:many1Indent(command) : command => do
let id := id.getId.replacePrefix rootNamespace Name.anonymous
unless (← getEnv).contains id do
withMacroExpansion (← getRef) cmds <| elabCommand cmds
syntax "compile_parser_descr " ident : command
def compileParserDescr (root : Name) : ParserDescr → MacroM Command
| .const name => do
let some alias := aliases.find? name
| Macro.throwError s!"no parser alias defined for alias `{name}`"
`(abbrev $root := $alias)
| .unary name p => do
let some alias := aliases.find? name
| Macro.throwError s!"no parser alias defined for alias `{name}`"
let pName := Name.str root "op"
`(
$(← compileParserDescr pName p)
@[inline] def $root := $alias ↑$pName
)
| .binary name p1 p2 => do
let some alias := aliases.find? name
| Macro.throwError s!"no parser alias defined for alias `{name}`"
let p1Name := Name.str root "lhs"
let p2Name := Name.str root "rhs"
`(
$(← compileParserDescr p1Name p1)
$(← compileParserDescr p2Name p2)
@[inline] def $root := $alias ↑$p1Name ↑$p2Name
)
| .node kind _prec p => do -- Only partial support
let pName := Name.str root "raw"
`(
$(← compileParserDescr pName p):command
@[inline] def $root := Parsec.node $(quote kind) ↑$pName
)
| .trailingNode kind _prec _lhsPrec p => do -- Only partial support
let pName := Name.str root "raw"
`(
$(← compileParserDescr pName p):command
@[inline] def $root := Parsec.node $(quote kind) ↑$pName
)
| .symbol val =>
`(@[inline] def $root := Parsec.atom $(quote val.trim))
| .nonReservedSymbol val _includeIdent =>
`(@[inline] def $root := Parsec.atom $(quote val.trim))
| .cat catName _rbp => do -- Only partial support
let some alias := aliases.find? catName
| Macro.throwError s!"no parser alias defined for category `{catName}`"
`(abbrev $root := $alias)
| .parser name => do
let some alias := aliases.find? name
| Macro.throwError s!"no parser alias defined for parser `{name}`"
`(abbrev $root := $alias)
| .nodeWithAntiquot _name kind p => do
let nName := compiledName kind
let pName := Name.str nName "raw"
let nName := `_root_ ++ nName
let mut cmds := #[]
let pName := `_root_ ++ pName
cmds := cmds.push <| ← `(
if_not_def $pName
$(← compileParserDescr pName p):command
def $nName := Parsec.node $(quote kind) ↑$pName
)
unless root == nName do
cmds := cmds.push <| ← `(abbrev $root := $nName)
return ⟨mkNullNode cmds⟩
| .sepBy p _sep psep allowTrailingSep => do
let pName := Name.str root "elem"
let sName := Name.str root "sep"
`(
$(← compileParserDescr pName p):command
$(← compileParserDescr sName psep):command
@[inline] def $root := Parsec.sepBy ↑$pName ↑$sName $(quote allowTrailingSep)
)
| .sepBy1 p _sep psep allowTrailingSep => do
let pName := Name.str root "elem"
let sName := Name.str root "sep"
`(
$(← compileParserDescr pName p):command
$(← compileParserDescr sName psep):command
@[inline] def $root := Parsec.sepBy1 ↑$pName ↑$sName $(quote allowTrailingSep)
)
unsafe def unsafeEvalParserDescr
(env : Environment) (opts : Options) (name : Name) : Except String ParserDescr :=
env.evalConstCheck ParserDescr opts ``ParserDescr name |>.run.run
@[implementedBy unsafeEvalParserDescr] opaque evalParserDescr
(env : Environment) (opts : Options) (name : Name) : Except String ParserDescr
elab_rules : command | `(compile_parser_descr $id) => do
let name ← resolveGlobalConstNoOverload id
let rootName := `_root_ ++ (compiledName name)
let descr ← IO.ofExcept <| evalParserDescr (← getEnv) (← getOptions) name
let cmd ← liftMacroM <| compileParserDescr rootName descr
withMacroExpansion (← getRef) cmd <| elabCommand cmd
--------------------------------------------------------------------------------
-- # Tests
--------------------------------------------------------------------------------
instance [ToString ε] [Eval α] : Eval (Except ε α) where
eval f hideUnit :=
match f () with
| .ok a => Eval.eval (fun _ => a) hideUnit
| .error e => throw <| IO.userError <| toString e
instance : Eval Syntax where
eval f _ := IO.println <| f ()
-- Simple example of a symbol (an atom) wrapped in a node
syntax ex1 := "foo"
#print ex1
compile_parser_descr ex1
#print ex1.parsec
#print ex1.parsec.raw
#eval ex1.parsec.run " foo "
-- Reference to a pre-defined (and already compiled) piece of syntax
syntax ex1i := ex1
#print ex1i
compile_parser_descr ex1i
#eval ex1i.parsec.run " foo "
-- test that we properly compile syntax within a namespace
namespace foo
syntax ex1i := ex1
#print ex1i
compile_parser_descr ex1i
#eval ex1i.parsec.run " foo "
end foo
/-
Example of the or-else combinator with a reference to
an as of yet compiled piece of syntax
-/
syntax ex1b := "bar"
syntax exA := ex1 <|> ex1b
compile_parser_descr exA
#print exA.parsec.raw
#eval exA.parsec.run " foo "
#eval exA.parsec.run " bar "
-- Example of the and-then parser combinator
syntax ex2 := exA "baz"
compile_parser_descr ex2
#print ex2.parsec.raw
#eval ex2.parsec.run " foo baz "
-- Note that we cannot use a real parser unless we have defined an alias for it
syntax exP := Command.infix
--compile_parser_descr exP -- errors, as expected
-- Example of using our own builtin aliases (i.e., `decimal`)
def decimal : ParserDescr := .const `decimal
syntax exSepDigit := "[" decimal,* "]"
#print exSepDigit
compile_parser_descr exSepDigit
#print exSepDigit.parsec.raw
#eval exSepDigit.parsec.run "[]"
--#eval exSepDigit.parsec.run "[0,] "
#eval exSepDigit.parsec.run " [0, 1]"
-- Demonstration of compiling builtin Lean syntax
compile_parser_descr explicitBinders
#check binderIdent.parsec.raw
#eval binderIdent.parsec.run "foo.bar"
#eval explicitBinders.parsec.run "(foo.bar : Nat) (_ : String)"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment