Skip to content

Instantly share code, notes, and snippets.

@deanebarker
Last active June 1, 2024 18:13
Show Gist options
  • Save deanebarker/d2af243d053c3ac6b688a1b82ff378c2 to your computer and use it in GitHub Desktop.
Save deanebarker/d2af243d053c3ac6b688a1b82ff378c2 to your computer and use it in GitHub Desktop.
A very simple parser combinator
void Main()
{
// The text to test it on
var text = new WorkingText("Annie1aa2");
// The parser: hits on "Deane" or "Annie" following by a number, then two letters, then another number
var parser = new Sequence(
new Any(
new TextParser("Deane"),
new TextParser("Annie")
),
new AnyNumberParser(),
new AnyLetterParser(),
new AnyLetterParser(),
new AnyNumberParser()
);
var results = new StringBuilder();
parser.Parse(ref text, ref results);
results.Dump();
// results: Annie1aa2
// textPosition: 9
}
public interface IParser
{
bool Parse(ref WorkingText text, ref StringBuilder sb);
}
// Any string of characters
public class TextParser : IParser
{
private string value;
public TextParser(string _value)
{
value = _value;
}
public bool Parse(ref WorkingText text, ref StringBuilder sb)
{
var start = text.Position;
var slice = text.GetNext(value.Length);
if (slice == value)
{
sb.Append(slice);
return true;
}
text.Position = start;
return false;
}
}
// Any non-numeric character
public class AnyLetterParser : IParser
{
public bool Parse(ref WorkingText text, ref StringBuilder sb)
{
var character = text.GetNext();
if (Char.IsLetter(character))
{
sb.Append(character);
return true;
}
return false;
}
}
// Any numeric character
public class AnyNumberParser : IParser
{
public bool Parse(ref WorkingText text, ref StringBuilder sb)
{
var character = text.GetNext();
if (Char.IsDigit(character))
{
sb.Append(character);
return true;
}
return false;
}
}
public class Any : IParser
{
private IParser[] parsers;
public Any(params IParser[] _parsers)
{
parsers = _parsers;
}
public bool Parse(ref WorkingText text, ref StringBuilder sb)
{
foreach (var parser in parsers)
{
if (parser.Parse(ref text, ref sb))
{
return true;
}
}
// We don't need to reset here, because the failing parsers should have reset for us
return false;
}
}
public class Sequence : IParser
{
private IParser[] parsers;
public Sequence(params IParser[] _parsers)
{
parsers = _parsers;
}
public bool Parse(ref WorkingText text, ref StringBuilder sb)
{
var start = text.Position;
var sbStart = sb.Length;
foreach (var parser in parsers)
{
if (!parser.Parse(ref text, ref sb))
{
// Something returned false
text.Position = start; // Reset the cursor position
sb.Remove(sbStart, sb.Length); // Result the results
return false;
}
}
return true;
}
}
// This is basically just a string with a position indicator
public class WorkingText
{
public string Value { get; set; }
public int Position { get; set; }
public WorkingText(string text)
{
Value = text;
}
public string GetNext(int length)
{
var result = Value.Substring(Position, length);
Position += length;
return result;
}
public char GetNext()
{
var character = Value[Position];
Position++;
return character;
}
public bool AtEnd()
{
return Position == Value.Length;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment