Last active
June 1, 2024 18:13
-
-
Save deanebarker/d2af243d053c3ac6b688a1b82ff378c2 to your computer and use it in GitHub Desktop.
A very simple parser combinator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void Main() | |
{ | |
// The text to test it on | |
var text = new WorkingText("Annie1aa2"); | |
// The parser: hits on "Deane" or "Annie" following by a number, then two letters, then another number | |
var parser = new Sequence( | |
new Any( | |
new TextParser("Deane"), | |
new TextParser("Annie") | |
), | |
new AnyNumberParser(), | |
new AnyLetterParser(), | |
new AnyLetterParser(), | |
new AnyNumberParser() | |
); | |
var results = new StringBuilder(); | |
parser.Parse(ref text, ref results); | |
results.Dump(); | |
// results: Annie1aa2 | |
// textPosition: 9 | |
} | |
public interface IParser | |
{ | |
bool Parse(ref WorkingText text, ref StringBuilder sb); | |
} | |
// Any string of characters | |
public class TextParser : IParser | |
{ | |
private string value; | |
public TextParser(string _value) | |
{ | |
value = _value; | |
} | |
public bool Parse(ref WorkingText text, ref StringBuilder sb) | |
{ | |
var start = text.Position; | |
var slice = text.GetNext(value.Length); | |
if (slice == value) | |
{ | |
sb.Append(slice); | |
return true; | |
} | |
text.Position = start; | |
return false; | |
} | |
} | |
// Any non-numeric character | |
public class AnyLetterParser : IParser | |
{ | |
public bool Parse(ref WorkingText text, ref StringBuilder sb) | |
{ | |
var character = text.GetNext(); | |
if (Char.IsLetter(character)) | |
{ | |
sb.Append(character); | |
return true; | |
} | |
return false; | |
} | |
} | |
// Any numeric character | |
public class AnyNumberParser : IParser | |
{ | |
public bool Parse(ref WorkingText text, ref StringBuilder sb) | |
{ | |
var character = text.GetNext(); | |
if (Char.IsDigit(character)) | |
{ | |
sb.Append(character); | |
return true; | |
} | |
return false; | |
} | |
} | |
public class Any : IParser | |
{ | |
private IParser[] parsers; | |
public Any(params IParser[] _parsers) | |
{ | |
parsers = _parsers; | |
} | |
public bool Parse(ref WorkingText text, ref StringBuilder sb) | |
{ | |
foreach (var parser in parsers) | |
{ | |
if (parser.Parse(ref text, ref sb)) | |
{ | |
return true; | |
} | |
} | |
// We don't need to reset here, because the failing parsers should have reset for us | |
return false; | |
} | |
} | |
public class Sequence : IParser | |
{ | |
private IParser[] parsers; | |
public Sequence(params IParser[] _parsers) | |
{ | |
parsers = _parsers; | |
} | |
public bool Parse(ref WorkingText text, ref StringBuilder sb) | |
{ | |
var start = text.Position; | |
var sbStart = sb.Length; | |
foreach (var parser in parsers) | |
{ | |
if (!parser.Parse(ref text, ref sb)) | |
{ | |
// Something returned false | |
text.Position = start; // Reset the cursor position | |
sb.Remove(sbStart, sb.Length); // Result the results | |
return false; | |
} | |
} | |
return true; | |
} | |
} | |
// This is basically just a string with a position indicator | |
public class WorkingText | |
{ | |
public string Value { get; set; } | |
public int Position { get; set; } | |
public WorkingText(string text) | |
{ | |
Value = text; | |
} | |
public string GetNext(int length) | |
{ | |
var result = Value.Substring(Position, length); | |
Position += length; | |
return result; | |
} | |
public char GetNext() | |
{ | |
var character = Value[Position]; | |
Position++; | |
return character; | |
} | |
public bool AtEnd() | |
{ | |
return Position == Value.Length; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment