Last active
June 19, 2021 17:03
-
-
Save mrange/1986731a9e0986ca996e82e749831258 to your computer and use it in GitHub Desktop.
Parser Combinators 2 C#
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#nullable enable | |
namespace CsParserCombinator | |
{ | |
using System; | |
using static Parsers; | |
class Program | |
{ | |
static readonly Parser<int> _algebraParser = CreateAlgebraParser(); | |
static Parser<int> CreateAlgebraParser() | |
{ | |
// In general, after each parsed unit we consume all whitespaces. | |
static Parser<Unit> Tk(char c) => Token(c).Left(Whitespaces); | |
static Parser<Func<int, int, int>> Op(char c, Func<int, int, int> f) => | |
Tk(c).Map0(f) | |
; | |
// Because the syntax is recursive we need a parser forwarder | |
var (expr, sexpr) = Forwarder<int>(); | |
var int_ = Int.Left(Whitespaces); | |
var term = int_.Or(expr.Between(Tk('('), Tk(')'))); | |
// Separating operators in "levels" is a trick to get | |
// * and / have higher precendence than + and - | |
var op0 = Op('*', (x,y) => x*y).Or(Op('/', (x,y) => x/y)); | |
var lvl0 = term.SepBy(op0); | |
var op1 = Op('+', (x,y) => x+y).Or(Op('-', (x,y) => x-y)); | |
var lvl1 = lvl0.SepBy(op1); | |
// Update the forwarder to support recursive syntax | |
sexpr(lvl1); | |
return Whitespaces.Right(lvl1).Left(EOF); | |
} | |
static void Algebra() | |
{ | |
var input = " 4*2 + 3*( 3 - 4 ) "; | |
var result = _algebraParser.Run(input); | |
if (result != null) | |
{ | |
Console.WriteLine($"{input} => {result}"); | |
} | |
else | |
{ | |
Console.WriteLine($"{input}, can't be parsed"); | |
} | |
} | |
static void Simple() | |
{ | |
const string Lines = @" | |
x=3 | |
xyz = 45 | |
"; | |
var id = Letter.ManyChar(atLeast: 1); | |
var line = Whitespaces | |
.Right(id) | |
.Left(Whitespaces) | |
.Left(Token('=')) | |
.Left(Whitespaces) | |
.Pair(Int) | |
.Left(Whitespaces) | |
; | |
var lines = line.Many(); | |
var full = lines.Left(EOF); | |
var result = full.Run(Lines); | |
if (result != null) | |
{ | |
Console.WriteLine("Parsing successful"); | |
foreach (var (k, v) in result) | |
{ | |
Console.WriteLine($"K:{k}, V:{v}"); | |
} | |
} | |
else | |
{ | |
Console.WriteLine("Parsing failed"); | |
} | |
Console.WriteLine($"{result}"); | |
} | |
static void Main(string[] args) | |
{ | |
Algebra(); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#nullable enable | |
namespace CsParserCombinator | |
{ | |
using System; | |
using System.Collections.Generic; | |
using System.Globalization; | |
using System.Text; | |
class Unit | |
{ | |
private Unit() {} | |
public static readonly Unit Value = new Unit(); | |
} | |
delegate (T, int)? Parser<T>(string s, int i); | |
static class Parsers | |
{ | |
public static T? Run<T>(this Parser<T> t, string s) => | |
t(s, 0) switch | |
{ | |
null => default | |
, var (tv, _) => tv | |
}; | |
public static Parser<T> Return<T>(T v) => | |
(s, i) => (v, i) | |
; | |
public static Parser<T> Fail<T>() => | |
(s, i) => null | |
; | |
public static Parser<U> Bind<T, U>(this Parser<T> t, Func<T, Parser<U>> uf) => | |
(s, i) => | |
t(s, i) switch | |
{ | |
null => null | |
, var (tv, ti) => uf(tv)(s, ti) | |
}; | |
public static Parser<T> Or<T>(this Parser<T> t, Parser<T> u) => | |
(s, i) => | |
t(s, i) switch | |
{ | |
null => u(s, i) | |
, var tr => tr | |
}; | |
public static Parser<U> Map<T, U>(this Parser<T> t, Func<T, U> m) => | |
t.Bind(tv => Return(m(tv))) | |
; | |
public static Parser<U> Map0<T, U>(this Parser<T> t, U u) => | |
t.Bind(tv => Return(u)) | |
; | |
public static Parser<(T, U)> Pair<T, U>(this Parser<T> t, Parser<U> u) => | |
t.Bind(tv => u.Bind(uv => Return((tv, uv)))) | |
; | |
public static Parser<T> Left<T, U>(this Parser<T> t, Parser<U> u) => | |
t.Bind(tv => u.Bind(_ => Return(tv))) | |
; | |
public static Parser<U> Right<T, U>(this Parser<T> t, Parser<U> u) => | |
t.Bind(_ => u.Bind(uv => Return(uv))) | |
; | |
public static Parser<T> Between<T, F, E>(this Parser<T> t, Parser<F> f, Parser<E> e) => | |
f.Right(t).Left(e) | |
; | |
public readonly static Parser<Unit> EOF = | |
(s, i) => i < s.Length ? null : (Unit.Value, i) | |
; | |
public readonly static Parser<char> AnyChar = | |
(s, i) => i < s.Length ? (s[i], i + 1) : null | |
; | |
public static Parser<Unit> Token(char token) => | |
// Could be expressed as AnyChar.Bind but this is an optimization | |
(s, i) => i < s.Length && s[i] == token ? (Unit.Value, i + 1) : null | |
; | |
public static Parser<char> Satisfy(Func<char, bool> test) => | |
// Could be expressed as AnyChar.Bind but this is an optimization | |
(s, i) => i < s.Length && test(s[i]) ? (s[i], i + 1) : null | |
; | |
public static Parser<Unit> Token(string token) => | |
(s, i) => | |
s.Length - i >= token.Length && s.AsSpan().Slice(i, token.Length).Equals(token.AsSpan(), StringComparison.Ordinal) | |
? (Unit.Value, i + token.Length) | |
: null | |
; | |
public readonly static Parser<char> Digit = Satisfy(Char.IsDigit); | |
public readonly static Parser<char> Letter = Satisfy(Char.IsLetter); | |
public readonly static Parser<char> Whitespace = Satisfy(Char.IsWhiteSpace); | |
public static Parser<T> SepBy<T>(this Parser<T> t, Parser<Func<T, T, T>> sep) => | |
(s, i) => | |
{ | |
switch (t(s, i)) | |
{ | |
case null: | |
return null; | |
case var (tv, ti): | |
var result = tv; | |
var current = ti; | |
while(true) | |
{ | |
switch(sep(s, current)) | |
{ | |
case null: | |
return (result, current); | |
case var (sv, si): | |
switch(t(s, si)) | |
{ | |
case null: | |
return null; | |
case var (ttv, tti): | |
result = sv(result, ttv); | |
current = tti; | |
break; | |
}; | |
break; | |
} | |
}; | |
// Will never be reached | |
return (result, current); | |
}; | |
}; | |
public static Parser<T[]> Many<T>(this Parser<T> t, int atLeast = 0, int atMost = int.MaxValue) => | |
(s, i) => | |
{ | |
var result = new List<T>(Math.Max(atLeast, 16)); | |
var current = i; | |
while(result.Count < atMost) | |
{ | |
switch(t(s, current)) | |
{ | |
case null: | |
// Give C# tail recursion optimization now | |
goto done; | |
case var (tv, ti): | |
result.Add(tv); | |
current = ti; | |
break; | |
}; | |
} | |
done: | |
return result.Count >= atLeast | |
? (result.ToArray(), current) | |
: null | |
; | |
}; | |
public static Parser<string> ManyChar(this Parser<char> t, int atLeast = 0, int atMost = int.MaxValue) => | |
(s, i) => | |
{ | |
var result = new StringBuilder(Math.Max(atLeast, 16)); | |
var current = i; | |
while(result.Length < atMost) | |
{ | |
switch(t(s, current)) | |
{ | |
case null: | |
// Give C# tail recursion optimization now | |
goto done; | |
case var (tv, ti): | |
result.Append(tv); | |
current = ti; | |
break; | |
}; | |
} | |
done: | |
return result.Length >= atLeast | |
? (result.ToString(), current) | |
: null | |
; | |
}; | |
public static Parser<Unit> SkipMany<T>(this Parser<T> t, int atLeast = 0, int atMost = int.MaxValue) => | |
(s, i) => | |
{ | |
var result = 0; | |
var current = i; | |
while(result < atMost) | |
{ | |
switch(t(s, current)) | |
{ | |
case null: | |
// Give C# tail recursion optimization now | |
goto done; | |
case var (tv, ti): | |
++result; | |
current = ti; | |
break; | |
}; | |
} | |
done: | |
return result >= atLeast | |
? (Unit.Value, current) | |
: null | |
; | |
}; | |
public readonly static Parser<Unit> Whitespaces = Whitespace.SkipMany(); | |
public readonly static Parser<Unit> Whitespaces1 = Whitespace.SkipMany(atLeast: 1); | |
public readonly static Parser<int> Int = Digit | |
.ManyChar(atLeast: 1) | |
.Map(cs => int.Parse(cs)); | |
public static (Parser<T>, Action<Parser<T>>) Forwarder<T>() | |
{ | |
var fp = Fail<T>(); | |
Parser<T> p = (s, i) => fp(s, i); | |
Action<Parser<T>> s = sp => { fp = sp; }; | |
return (p, s); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment