Last active
December 27, 2015 08:39
-
-
Save dbones/7298086 to your computer and use it in GitHub Desktop.
FluentRegex - create a regex text via a fluent interface
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using System.Resources; | |
using System.Runtime.CompilerServices; | |
using System.Text; | |
using System.Text.RegularExpressions; | |
namespace FluentRegex | |
{ | |
public class Class1 | |
{ | |
static void Main(string[] args) | |
{ | |
//the following is vobose, but you can easily knock out a regex expression | |
//use this to create the expressions in your program or help you write one, thats | |
//assuming its your cup of tea | |
//this is code is part of a hacking session | |
//Boxee/XBMC file format for a tv series looks like this | |
//epic.super.heros.s01e12.mp4 | |
//epic.super.heros.s01e11.Episode.Title.mp4 | |
//epic.super.heros.s1e07.Episode.Title.mp4 | |
//seperate out patterns | |
var namePattern = Pattern.Define.With(Characters.Expression(@"(\w+(\.\w+)*?)")); | |
var seriesPattern = Pattern.Define.With(Characters.Set("sS")).FollowedBy(Characters.Digit().Repeat.Between(1, 2).As("SeriesNum")); | |
var episodePattern = Pattern.Define.With(Characters.Set("eE")).FollowedBy(Characters.Digit().Repeat.Between(1, 2).As("EpisodeNum")); | |
//the file name pattern | |
var fileNamePattern = Pattern.Define | |
.With(Group.TheFollowing(namePattern).As("Name")) | |
.FollowedBy(".") | |
.FollowedBy(seriesPattern) | |
.FollowedBy(episodePattern) | |
.FollowedBy(Group.TheFollowing(Pattern.Define.With(".").FollowedBy(Group.TheFollowing(namePattern).As("Title"))).Repeat.ZeroOrOne()) | |
.FollowedBy(".mp4"); | |
//creates the regex | |
var test = fileNamePattern.Compile(); | |
//(?<Name>(\w+(\.\w+)*?))\.[sS](?<SeriesNum>\d{1,2})[eE](?<EpisodeNum>\d{1,2})(?:\.(?<Title>(\w+(\.\w+)*?)))?\.mp4 | |
} | |
} | |
public class Pattern : Expression<IMainPattern>, IStartPattern, IMainPattern | |
{ | |
private Group _group = new Group(); | |
private Expression _lastExpression; | |
private Pattern() | |
{ | |
Repeat = new Repeat<IMainPattern>(this); | |
Is = new Absent<IMainPattern>(this); | |
} | |
public static IStartPattern Define | |
{ | |
get | |
{ | |
return new Pattern(); | |
} | |
} | |
private void AddExpression(Expression exp) | |
{ | |
if (_lastExpression != null) | |
{ | |
_group.Add(_lastExpression); | |
} | |
_lastExpression = exp; | |
} | |
public IMainPattern With(Expression exp) | |
{ | |
AddExpression(exp); | |
return this; | |
} | |
public IMainPattern FollowedBy(Expression exp) | |
{ | |
AddExpression(exp); | |
return this; | |
} | |
public IMainPattern OrBy(Expression exp) | |
{ | |
//ensure we do not affect an inner OR | |
var or = exp as Or; | |
if (or != null) | |
{ | |
var @group = new Group(); | |
@group.Add(exp); | |
exp = @group; | |
} | |
//check to see if there is an existing | |
//OR expression which we need to append to | |
or = _lastExpression as Or; | |
if (or == null) | |
{ | |
or = new Or(); | |
or.Add(exp); | |
AddExpression(or); | |
} | |
else | |
{ | |
or.Add(exp); | |
} | |
return this; | |
} | |
//public IMainPattern As(string name) | |
//{ | |
// var @group = new Group(name); | |
// if (_lastExpression != null) | |
// { | |
// @group.Add(_lastExpression); | |
// } | |
// _lastExpression = null; | |
// AddExpression(@group); | |
// return this; | |
//} | |
public override string GetRegex() | |
{ | |
if (_lastExpression != null) | |
{ | |
_group.Add(_lastExpression); | |
_lastExpression = null; | |
} | |
return _group.GetRegex(); | |
} | |
public string Compile() | |
{ | |
return GetRegex(); | |
} | |
} | |
public interface IPattern | |
{ | |
string Compile(); | |
} | |
public interface IMainPattern : IPattern | |
{ | |
IMainPattern FollowedBy(Expression exp); | |
IMainPattern OrBy(Expression exp); | |
//IMainPattern As(string name); | |
//Repeat<IMainPattern> Repeat { get; } | |
} | |
public interface IHandlePattern : IPattern | |
{ | |
string Compile(); | |
} | |
public interface IStartPattern : IPattern | |
{ | |
IMainPattern With(Expression exp); | |
} | |
public class CharaterValue | |
{ | |
public string Value { get; set; } | |
public string AbsentValue { get; set; } | |
} | |
public abstract class Expression | |
{ | |
private static readonly Regex EscapeCharacters = new Regex(@"[\^$.?|*+()[{]"); | |
protected static string ApplyEscapeCharacters(string str) | |
{ | |
return EscapeCharacters.Replace(str, match => "\\" + match.Value); | |
} | |
protected internal CharaterValue Value { get; protected set; } | |
public abstract string GetRegex(); | |
} | |
public abstract class Expression<T> : Expression | |
{ | |
public Repeat<T> Repeat { get; protected set; } | |
public Absent<T> Is { get; protected set; } | |
public override string GetRegex() | |
{ | |
var result = Is.Value ? Value.AbsentValue : Value.Value; | |
return string.Format("{0}{1}", result, Repeat.Value); | |
} | |
} | |
public partial class Characters : Expression<Characters> | |
{ | |
internal Characters(CharaterValue value) | |
{ | |
Value = value; | |
Repeat = new Repeat<Characters>(this); | |
Is = new Absent<Characters>(this); | |
} | |
} | |
public partial class Characters : Expression<Characters> | |
{ | |
public static Characters Alphanumeric() | |
{ | |
var cv = new CharaterValue { Value = @"\w", AbsentValue = @"\W" }; | |
return new Characters(cv); | |
} | |
public static Characters Digit() | |
{ | |
var cv = new CharaterValue { Value = @"\d", AbsentValue = @"\D" }; | |
return new Characters(cv); | |
} | |
public static Characters Whitespace() | |
{ | |
var cv = new CharaterValue { Value = @"\s", AbsentValue = @"\S" }; | |
return new Characters(cv); | |
} | |
public static Characters NamedClass(string @class) | |
{ | |
string value = string.Format("\\p{{{0}}}", @class); | |
string absentValue = string.Format("\\P{{{0}}}", @class); | |
var cv = new CharaterValue { Value = value, AbsentValue = absentValue }; | |
return new Characters(cv); | |
} | |
public static Characters Set(string set) | |
{ | |
string value = string.Format("[{0}]", set); | |
string absentValue = string.Format("[^{0}]", set); | |
var cv = new CharaterValue { Value = value, AbsentValue = absentValue }; | |
return new Characters(cv); | |
} | |
public static Characters Set(string set, string excluding) | |
{ | |
string value = string.Format("[{0}-[{1}]]", set, excluding); | |
string absentValue = string.Format("[^{0}-[{1}]]", set, excluding); | |
var cv = new CharaterValue { Value = value, AbsentValue = absentValue }; | |
return new Characters(cv); | |
} | |
public static Characters Expression(string regularExpression) | |
{ | |
string absentValue = string.Format("^({0})", regularExpression); | |
var cv = new CharaterValue { Value = regularExpression, AbsentValue = absentValue }; | |
return new Characters(cv); | |
} | |
public static Characters Literal(char @char) | |
{ | |
string literal = ApplyEscapeCharacters(@char.ToString()); | |
string value = string.Format("{0}", literal); | |
string absentValue = string.Format("^{0}", literal); | |
var cv = new CharaterValue { Value = value, AbsentValue = absentValue }; | |
return new Characters(cv); | |
} | |
} | |
public class StringPattern : Expression<StringPattern> | |
{ | |
internal StringPattern(string value) | |
{ | |
string literal = ApplyEscapeCharacters(value); | |
Value = new CharaterValue() | |
{ | |
Value = literal, | |
AbsentValue = string.Format("^({0})", literal) | |
}; | |
Repeat = new Repeat<StringPattern>(this); | |
Is = new Absent<StringPattern>(this); | |
} | |
} | |
public static class Extensions | |
{ | |
public static Repeat<StringPattern> Repeat(this string str) | |
{ | |
var stringPattern = new StringPattern(str); | |
return stringPattern.Repeat; | |
} | |
public static IMainPattern FollowedBy(this IMainPattern pattern, string exp) | |
{ | |
var stringPattern = new StringPattern(exp); | |
return pattern.FollowedBy(stringPattern); ; | |
} | |
public static IMainPattern OrBy(this IMainPattern pattern, string exp) | |
{ | |
var stringPattern = new StringPattern(exp); | |
return pattern.OrBy(stringPattern); | |
} | |
public static IMainPattern With(this IStartPattern pattern, string exp) | |
{ | |
var stringPattern = new StringPattern(exp); | |
return pattern.With(stringPattern); ; | |
} | |
public static IMainPattern FollowedBy(this IMainPattern pattern, IPattern exp) | |
{ | |
return pattern.FollowedBy((Expression)exp); | |
} | |
public static IMainPattern OrBy(this IMainPattern pattern, IPattern exp) | |
{ | |
return pattern.OrBy((Expression)exp); | |
} | |
public static IMainPattern With(this IStartPattern pattern, IPattern exp) | |
{ | |
return pattern.With((Expression)exp); | |
} | |
public static Group As(this Expression expression, string name) | |
{ | |
var @group = new Group(name); | |
@group.Add(expression); | |
return @group; | |
} | |
public static Or Or(this Expression expression1, string expression2) | |
{ | |
return expression1.Or(new StringPattern(expression2)); | |
} | |
public static Or Or(this string expression1, string expression2) | |
{ | |
return (new StringPattern(expression1)).Or(new StringPattern(expression2)); | |
} | |
public static Or Or(this string expression1, Expression expression2) | |
{ | |
return (new StringPattern(expression1)).Or(expression2); | |
} | |
public static Or Or(this Expression expression1, Expression expression2) | |
{ | |
var @group = expression2 as Group; | |
if (@group != null) | |
{ | |
@group.Force(); | |
} | |
//check to see if there is an existing | |
//OR expression which we need to append to | |
var or = expression1 as Or; | |
if (or == null) | |
{ | |
@group = expression1 as Group; | |
if (@group != null) | |
{ | |
@group.Force(); | |
} | |
or = new Or(); | |
or.Add(expression1); | |
or.Add(expression2); | |
} | |
else | |
{ | |
or.Add(expression2); | |
} | |
return or; | |
} | |
} | |
public partial class Group : Expression<Group> | |
{ | |
private string _groupName; | |
List<Expression> _expressions = new List<Expression>(); | |
private bool _forceGroup = false; | |
internal Group(string groupName) | |
: this() | |
{ | |
_groupName = groupName; | |
} | |
internal Group() | |
{ | |
Is = new Absent<Group>(this); | |
Repeat = new Repeat<Group>(this); | |
} | |
public void Add(Expression expression) | |
{ | |
_expressions.Add(expression); | |
} | |
public void AddRange(IEnumerable<Expression> expressions) | |
{ | |
_expressions.AddRange(expressions); | |
} | |
public Group As(string name) | |
{ | |
_groupName = name; | |
return this; | |
} | |
public Group Force() | |
{ | |
_forceGroup = true; | |
return this; | |
} | |
public override string GetRegex() | |
{ | |
var repeat = Repeat.Value; | |
bool hasName = !string.IsNullOrEmpty(_groupName); | |
bool hasRepeat = !string.IsNullOrEmpty(repeat); | |
bool isAbsent = Is.Value; | |
bool requiresGroup = hasName || hasRepeat || isAbsent || _forceGroup; | |
var result = new StringBuilder(); | |
if (requiresGroup) | |
{ | |
if (isAbsent) | |
{ | |
result.Append("^"); | |
} | |
if (hasName) | |
{ | |
result.AppendFormat("(?<{0}>", _groupName); | |
} | |
else | |
{ | |
result.Append("(?:"); | |
} | |
} | |
foreach (var expression in _expressions) | |
{ | |
result.Append(expression.GetRegex()); | |
} | |
if (requiresGroup) | |
{ | |
result.Append(")"); | |
if (hasRepeat) | |
{ | |
result.Append(repeat); | |
} | |
} | |
return result.ToString(); | |
} | |
} | |
public partial class Group | |
{ | |
public static Group TheFollowing(params Expression[] expressions) | |
{ | |
var @group = new Group(); | |
@group.AddRange(expressions); | |
return group; | |
} | |
public static Group TheFollowing(IMainPattern pattern) | |
{ | |
return TheFollowing((Expression) pattern); | |
} | |
} | |
public class Or : Expression<Or> | |
{ | |
List<Expression> _expressions = new List<Expression>(); | |
internal Or() | |
{ | |
Is = new Absent<Or>(this); | |
Repeat = new Repeat<Or>(this); | |
} | |
public void Add(Expression expression) | |
{ | |
_expressions.Add(expression); | |
} | |
public void AddRange(IEnumerable<Expression> expressions) | |
{ | |
_expressions.AddRange(expressions); | |
} | |
public override string GetRegex() | |
{ | |
var repeat = Repeat.Value; | |
bool hasRepeat = !string.IsNullOrEmpty(repeat); | |
bool isAbsent = Is.Value; | |
bool requiresGroup = hasRepeat || isAbsent; | |
var result = new StringBuilder(); | |
if (requiresGroup) | |
{ | |
if (isAbsent) | |
{ | |
result.Append("^"); | |
} | |
else | |
{ | |
result.Append("(?:"); | |
} | |
} | |
var pipeSeperatedList = string.Join("|", _expressions.Select(x => x.GetRegex()).ToArray()); | |
result.Append(pipeSeperatedList); | |
if (requiresGroup) | |
{ | |
result.Append(")"); | |
if (hasRepeat) | |
{ | |
result.Append(repeat); | |
} | |
} | |
return result.ToString(); | |
} | |
} | |
public class Absent<T> | |
{ | |
private readonly T _instance; | |
public Absent(T instance) | |
{ | |
_instance = instance; | |
Value = false; | |
} | |
public bool Value { get; private set; } | |
public T IsAbsent() | |
{ | |
Value = true; | |
return _instance; | |
} | |
public T NotAbsent() | |
{ | |
Value = false; | |
return _instance; | |
} | |
} | |
public class Repeat<T> | |
{ | |
private readonly T _instance; | |
private bool _fewTimesAsPossible = false; | |
private bool _isRangeOrExactValue = false; | |
private string _current = ""; | |
public Repeat(T instance) | |
{ | |
_instance = instance; | |
} | |
internal string Value | |
{ | |
get | |
{ | |
var fewTimes = _isRangeOrExactValue | |
? "" | |
: _fewTimesAsPossible ? "?" : ""; | |
return string.Format("{0}{1}", _current, fewTimes); | |
} | |
} | |
public Repeat<T> FewTimesAsPossible | |
{ | |
get | |
{ | |
_fewTimesAsPossible = !_fewTimesAsPossible; | |
return this; | |
} | |
} | |
public T JustOnce() | |
{ | |
_isRangeOrExactValue = false; | |
_current = ""; | |
return _instance; | |
} | |
public T AnyNumber() | |
{ | |
_isRangeOrExactValue = false; | |
_current = "*"; | |
return _instance; | |
} | |
public T OneOrMore() | |
{ | |
_isRangeOrExactValue = false; | |
_current = "+"; | |
return _instance; | |
} | |
public T ZeroOrOne() | |
{ | |
_isRangeOrExactValue = false; | |
_current = "?"; | |
return _instance; | |
} | |
public T Exactly(int times) | |
{ | |
_isRangeOrExactValue = true; | |
_current = string.Format("{{{0}}}", times); | |
return _instance; | |
} | |
public T AtLeast(int minTimes) | |
{ | |
_isRangeOrExactValue = false; | |
_current = string.Format("{{{0},}}", minTimes); | |
return _instance; | |
} | |
public T Between(int minTimes, int maxTimes) | |
{ | |
_isRangeOrExactValue = false; | |
_current = string.Format("{{{0},{1}}}", minTimes, maxTimes); | |
return _instance; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment