Skip to content

Instantly share code, notes, and snippets.

| NonTerminal x ->
let rule = grammar.[x]
match parse offset rule.Prod with
| (Unmatched, _) as y -> y
| (parsed, endOffset) -> (rule.Func input.[offset..endOffset - 1] parsed, endOffset)
// ...
parse 0 <| NonTerminal start
// bool <- "true" / "false"
let boolRule = GrammarRule<bool>(Choice [Terminal "true"; Terminal "false"], (fun s _ -> Parsed (s = "true")))
// paren <- "(" expr ")"
let parseParen _ = function
| Production [TerminalSymbol "("; x; TerminalSymbol ")"] -> x
| x -> unexpected x
let parenRule = GrammarRule<bool>(Sequence [Terminal "("; NonTerminal "expr"; Terminal ")"], parseParen)
// not <- "!" atom
let g = Map.ofList [("bool", boolRule); ("paren", parenRule); ("not", notRule); ("atom", atomRule);
("and", andRule); ("or", orRule); ("expr", exprRule); ("start", startRule)]
type BooleanExpr =
| Atom of bool
| Binary of BooleanExpr * string * BooleanExpr
| Not of BooleanExpr
let rec evalBool = function
| Atom x -> x
| Binary (left, "&", right) -> if evalBool left then evalBool right else false
| Binary (left, "|", right) -> if evalBool left then true else evalBool right
| Binary (_, x, _) -> failwith <| "Unexpected binary operator: " + x
hexDigit <- [0123456789abcdefABCDEF]
escapedCharacter <- '\\' ([abfnrtv0\\"'[\]] / ('u' hexDigit hexDigit hexDigit hexDigit))
safeCharacter <- escapedCharacter / (!["\\\n] <anychar>)
oneofCharacter <- escapedCharacter / (![\\\]] <anychar>)
terminalUnicode <- '{' ( "Lu" / "Ll" / "Lt" / "Lm" / "Lo" / "Mn" / "Mc" / "Me" / "Nd" / "Nl" / "No" / "Pc" / "Pd" /
"Ps" / "Pe" / "Pi" / "Pf" / "Po" / "Sm" / "Sc" / "Sk" / "So" / "Zs" / "Zl" / "Zp" / "Cc" /
"Cf" / "Cs" / "Co" / "Cn" ) '}'
terminalOneOf <- '[' oneofCharacter+ ']'
terminalCharacter <- '\'' safeCharacter '\''
terminalWord <- '\"' safeCharacter+ '\"'
type Expression =
| Terminal of string
| TerminalOneOf of string
| TerminalUnicode of UnicodeCategory
| TerminalWildcard
| NonTerminal of string
| Epsilon
| Sequence of Expression list
| Choice of Expression list
| ZeroOrMore of Expression
let currentCharacter (s:string) i =
if i >= s.Length then None
else if Char.IsSurrogatePair(s, i) then Some s.[i..i+1] else Some s.[i..i]
let parseExpression (grammar:Map<string,GrammarRule<'a>>) start (input:string) =
let rec parse offset = function
| Terminal x ->
let e = offset + x.Length - 1
if e < input.Length && input.[offset..e] = x then (TerminalSymbol x, e + 1) else (Unmatched, offset)
| TerminalOneOf x -> match currentCharacter input offset with
| Some c -> if x.Contains(c) then (TerminalSymbol c, offset + c.Length) else (Unmatched, offset)
| None -> (Unmatched, offset)
| TerminalWildcard -> match currentCharacter input offset with
| Some c -> (TerminalSymbol c, offset + c.Length)
// unary <- ([!&] unary) / (atom [*+?])?
let parseUnary _ = function
| Production [TerminalSymbol "!"; Parsed x] -> Parsed <| Not x
| Production [TerminalSymbol "&"; Parsed x] -> Parsed <| And x
| Production [Parsed x; TerminalSymbol "*"] -> Parsed <| ZeroOrMore x
| Production [Parsed x; TerminalSymbol "+"] -> Parsed <| OneOrMore x
| Production [Parsed x; TerminalSymbol "?"] -> Parsed <| Optional x
| Production [Parsed _ as x; EmptyMatch] -> x
| x -> unexpected x
let unaryRule = GrammarRule<Expression>(Choice [Sequence [TerminalOneOf "!&"; NonTerminal "unary"];
let rec codeGen (b:StringBuilder) i = function
| Terminal x -> ibprintf b i """matchTerminal "%s" input offset""" <| escape x
| TerminalOneOf x -> ibprintf b i """matchTerminalOneOf "%s" input offset""" <| escape x
| TerminalWildcard -> ibprintf b i """matchTerminalWildcard input offset"""
| TerminalUnicode x -> ibprintf b i """matchTerminalUnicode System.Globalization.UnicodeCategory.%s input offset""" <| string(x)
| Epsilon -> ibprintf b i """if offset = input.Length then (EmptyMatch, offset) else (Unmatched, offset)"""
| NonTerminal x -> ibprintf b i """matchRule%s input offset""" <| capitalIdentifier x