Skip to content

Instantly share code, notes, and snippets.

@mrange
Last active June 19, 2021 17:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mrange/1986731a9e0986ca996e82e749831258 to your computer and use it in GitHub Desktop.
Save mrange/1986731a9e0986ca996e82e749831258 to your computer and use it in GitHub Desktop.
Parser Combinators 2 C#
#nullable enable
namespace CsParserCombinator
{
using System;
using static Parsers;
class Program
{
static readonly Parser<int> _algebraParser = CreateAlgebraParser();
static Parser<int> CreateAlgebraParser()
{
// In general, after each parsed unit we consume all whitespaces.
static Parser<Unit> Tk(char c) => Token(c).Left(Whitespaces);
static Parser<Func<int, int, int>> Op(char c, Func<int, int, int> f) =>
Tk(c).Map0(f)
;
// Because the syntax is recursive we need a parser forwarder
var (expr, sexpr) = Forwarder<int>();
var int_ = Int.Left(Whitespaces);
var term = int_.Or(expr.Between(Tk('('), Tk(')')));
// Separating operators in "levels" is a trick to get
// * and / have higher precendence than + and -
var op0 = Op('*', (x,y) => x*y).Or(Op('/', (x,y) => x/y));
var lvl0 = term.SepBy(op0);
var op1 = Op('+', (x,y) => x+y).Or(Op('-', (x,y) => x-y));
var lvl1 = lvl0.SepBy(op1);
// Update the forwarder to support recursive syntax
sexpr(lvl1);
return Whitespaces.Right(lvl1).Left(EOF);
}
static void Algebra()
{
var input = " 4*2 + 3*( 3 - 4 ) ";
var result = _algebraParser.Run(input);
if (result != null)
{
Console.WriteLine($"{input} => {result}");
}
else
{
Console.WriteLine($"{input}, can't be parsed");
}
}
static void Simple()
{
const string Lines = @"
x=3
xyz = 45
";
var id = Letter.ManyChar(atLeast: 1);
var line = Whitespaces
.Right(id)
.Left(Whitespaces)
.Left(Token('='))
.Left(Whitespaces)
.Pair(Int)
.Left(Whitespaces)
;
var lines = line.Many();
var full = lines.Left(EOF);
var result = full.Run(Lines);
if (result != null)
{
Console.WriteLine("Parsing successful");
foreach (var (k, v) in result)
{
Console.WriteLine($"K:{k}, V:{v}");
}
}
else
{
Console.WriteLine("Parsing failed");
}
Console.WriteLine($"{result}");
}
static void Main(string[] args)
{
Algebra();
}
}
}
#nullable enable
namespace CsParserCombinator
{
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Text;
class Unit
{
private Unit() {}
public static readonly Unit Value = new Unit();
}
delegate (T, int)? Parser<T>(string s, int i);
static class Parsers
{
public static T? Run<T>(this Parser<T> t, string s) =>
t(s, 0) switch
{
null => default
, var (tv, _) => tv
};
public static Parser<T> Return<T>(T v) =>
(s, i) => (v, i)
;
public static Parser<T> Fail<T>() =>
(s, i) => null
;
public static Parser<U> Bind<T, U>(this Parser<T> t, Func<T, Parser<U>> uf) =>
(s, i) =>
t(s, i) switch
{
null => null
, var (tv, ti) => uf(tv)(s, ti)
};
public static Parser<T> Or<T>(this Parser<T> t, Parser<T> u) =>
(s, i) =>
t(s, i) switch
{
null => u(s, i)
, var tr => tr
};
public static Parser<U> Map<T, U>(this Parser<T> t, Func<T, U> m) =>
t.Bind(tv => Return(m(tv)))
;
public static Parser<U> Map0<T, U>(this Parser<T> t, U u) =>
t.Bind(tv => Return(u))
;
public static Parser<(T, U)> Pair<T, U>(this Parser<T> t, Parser<U> u) =>
t.Bind(tv => u.Bind(uv => Return((tv, uv))))
;
public static Parser<T> Left<T, U>(this Parser<T> t, Parser<U> u) =>
t.Bind(tv => u.Bind(_ => Return(tv)))
;
public static Parser<U> Right<T, U>(this Parser<T> t, Parser<U> u) =>
t.Bind(_ => u.Bind(uv => Return(uv)))
;
public static Parser<T> Between<T, F, E>(this Parser<T> t, Parser<F> f, Parser<E> e) =>
f.Right(t).Left(e)
;
public readonly static Parser<Unit> EOF =
(s, i) => i < s.Length ? null : (Unit.Value, i)
;
public readonly static Parser<char> AnyChar =
(s, i) => i < s.Length ? (s[i], i + 1) : null
;
public static Parser<Unit> Token(char token) =>
// Could be expressed as AnyChar.Bind but this is an optimization
(s, i) => i < s.Length && s[i] == token ? (Unit.Value, i + 1) : null
;
public static Parser<char> Satisfy(Func<char, bool> test) =>
// Could be expressed as AnyChar.Bind but this is an optimization
(s, i) => i < s.Length && test(s[i]) ? (s[i], i + 1) : null
;
public static Parser<Unit> Token(string token) =>
(s, i) =>
s.Length - i >= token.Length && s.AsSpan().Slice(i, token.Length).Equals(token.AsSpan(), StringComparison.Ordinal)
? (Unit.Value, i + token.Length)
: null
;
public readonly static Parser<char> Digit = Satisfy(Char.IsDigit);
public readonly static Parser<char> Letter = Satisfy(Char.IsLetter);
public readonly static Parser<char> Whitespace = Satisfy(Char.IsWhiteSpace);
public static Parser<T> SepBy<T>(this Parser<T> t, Parser<Func<T, T, T>> sep) =>
(s, i) =>
{
switch (t(s, i))
{
case null:
return null;
case var (tv, ti):
var result = tv;
var current = ti;
while(true)
{
switch(sep(s, current))
{
case null:
return (result, current);
case var (sv, si):
switch(t(s, si))
{
case null:
return null;
case var (ttv, tti):
result = sv(result, ttv);
current = tti;
break;
};
break;
}
};
// Will never be reached
return (result, current);
};
};
public static Parser<T[]> Many<T>(this Parser<T> t, int atLeast = 0, int atMost = int.MaxValue) =>
(s, i) =>
{
var result = new List<T>(Math.Max(atLeast, 16));
var current = i;
while(result.Count < atMost)
{
switch(t(s, current))
{
case null:
// Give C# tail recursion optimization now
goto done;
case var (tv, ti):
result.Add(tv);
current = ti;
break;
};
}
done:
return result.Count >= atLeast
? (result.ToArray(), current)
: null
;
};
public static Parser<string> ManyChar(this Parser<char> t, int atLeast = 0, int atMost = int.MaxValue) =>
(s, i) =>
{
var result = new StringBuilder(Math.Max(atLeast, 16));
var current = i;
while(result.Length < atMost)
{
switch(t(s, current))
{
case null:
// Give C# tail recursion optimization now
goto done;
case var (tv, ti):
result.Append(tv);
current = ti;
break;
};
}
done:
return result.Length >= atLeast
? (result.ToString(), current)
: null
;
};
public static Parser<Unit> SkipMany<T>(this Parser<T> t, int atLeast = 0, int atMost = int.MaxValue) =>
(s, i) =>
{
var result = 0;
var current = i;
while(result < atMost)
{
switch(t(s, current))
{
case null:
// Give C# tail recursion optimization now
goto done;
case var (tv, ti):
++result;
current = ti;
break;
};
}
done:
return result >= atLeast
? (Unit.Value, current)
: null
;
};
public readonly static Parser<Unit> Whitespaces = Whitespace.SkipMany();
public readonly static Parser<Unit> Whitespaces1 = Whitespace.SkipMany(atLeast: 1);
public readonly static Parser<int> Int = Digit
.ManyChar(atLeast: 1)
.Map(cs => int.Parse(cs));
public static (Parser<T>, Action<Parser<T>>) Forwarder<T>()
{
var fp = Fail<T>();
Parser<T> p = (s, i) => fp(s, i);
Action<Parser<T>> s = sp => { fp = sp; };
return (p, s);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment