Skip to content

Instantly share code, notes, and snippets.

@AmadorMunozBerzosa
Last active June 19, 2021 16:36
Show Gist options
  • Save AmadorMunozBerzosa/3288771aaa199cc73acc7781bff4d49e to your computer and use it in GitHub Desktop.
Save AmadorMunozBerzosa/3288771aaa199cc73acc7781bff4d49e to your computer and use it in GitHub Desktop.
DSL wrapper for working with regular expressions in a readable manner in F#. Gist messes with the file ordering, but the correct file order is: Types > Evaluation > Operators > ActivePatterns > Examples
module Krow.Regex.ActivePatterns
open System.Text
let (|Regex|_|) (pattern:IRegex) input =
if input = null then
None
else
try
let match' = RegularExpressions.Regex.Match(input, pattern |> Regex.evaluate)
if match'.Success then
Some( List.tail [ for groups in match'.Groups -> groups.Value ])
else
None
with _ -> None
let (|Regexs|) (pattern:IRegex) input =
if input = null then [] else
try
let matches = RegularExpressions.Regex.Matches(input, pattern |> Regex.evaluate)
[ for match' in matches do (List.tail [ for group in match'.Groups -> group.Value ]) ]
with e -> []
[<AutoOpen>]
module Krow.Regex.Evaluation
open Krow.Regex.Types
open System.Text
module Regex =
let escape s = (RegularExpressions.Regex.Escape s).Replace("]", "\]")
let unescape (s:string) = (RegularExpressions.Regex.Unescape (s.Replace("\]", "]")))
[<AutoOpen>]
module private Helpers =
let groupable (regex:IRegex) =
match regex with
| :? Regex.Sequence | :? Regex.OneOf -> Regex.NonCapturing regex :> IRegex
| _ -> regex
let listGroupable (regex:IRegex) =
match regex with
| :? Regex.OneOf -> Regex.NonCapturing regex :> IRegex
| _ -> regex
let rec charsetContent charSet =
match charSet with
| CharSet.OneOf chars ->
let string = (new string(chars |> List.toArray)).Replace("/",@"\/")
$@"{string}"
| CharSet.Range (first,last) ->
$@"{first}-{last}"
| CharSet.Multiple charSets ->
charSets |> List.map charsetContent |> List.reduce (+)
let referenceString reference =
match reference with
| Group.Positional number -> number |> string
| Group.Named string -> string
|> escape
let rec evaluate (regex:IRegex) =
match regex with
| :? Regex.Literal as literal ->
let (Regex.Literal literal) = literal
escape literal
| :? Regex.Raw as literal ->
let (Regex.Raw literal) = literal
literal
| :? Regex.OneOf as oneOf ->
let (Regex.OneOf regexs) = oneOf
regexs
|> List.map evaluate |> String.concat "|"
| :? Regex.Sequence as sequence ->
let (Regex.Sequence regexs) = sequence
regexs |> List.map (listGroupable >> evaluate) |> String.concat ""
| :? Regex.NonCapturing as grouping ->
let (Regex.NonCapturing regex) = grouping
$@"(?:{evaluate regex})"
| :? Mode.WithModes as withModes ->
let (Mode.WithModes (modes, regex)) = withModes
let modeChar = function
| Mode.CaseInsensitive -> "i"
| Mode.Multiline -> "m"
| Mode.ExplicitCapture -> "n"
| Mode.IgnoreUnescapedWhiteSpace -> "x"
let modeList = modes |> List.map modeChar |> String.concat ""
$@"(?{modeList}:{regex})"
| :? Group.Reference as reference ->
let string = reference |> referenceString
match reference with
| Group.Positional _ -> $@"\{string}"
| Group.Named _ -> $@"\k<{string}>"
| :? Look.Look as look ->
match look with
| Look.Ahead regex -> $@"(?={regex |> evaluate})"
| Look.Behind regex -> $@"(?<={regex |> evaluate})"
| :? Look.Negated as look ->
let (Look.Negated look) = look
match look with
| Look.Ahead regex -> $@"(?!{regex |> evaluate})"
| Look.Behind regex -> $@"(?<!{regex |> evaluate})"
| :? Group.Group as group ->
let (Group.Group(group,regex)) = group
let regex = regex |> evaluate
match group with
// Capturing
| Group.Capturing -> $@"({regex})"
| Group.CapturingWithName name -> $@"(?<{name}>{regex})"
// Non capturing
| Group.NonBacktrackingGrouping -> $@"(?>{regex})"
// Balancing
| Group.UnCapturing reference ->
$@"(?<-{reference |> referenceString}>{regex})"
| Group.Balancing (newName, reference) ->
$@"(?<{newName |> escape}-{reference |> referenceString}>{regex})"
| :? SpecialChar.SpecialChar as special ->
match special with
| SpecialChar.WildCard -> @"."
| SpecialChar.Bell -> @"\a"
| SpecialChar.Backspace -> @"\b"
| SpecialChar.Tab -> @"\t"
| SpecialChar.VerticalTab -> @"\v"
| SpecialChar.CarriageReturn -> @"\r"
| SpecialChar.NewLine -> @"\n"
| SpecialChar.Escaped -> @"\e"
| SpecialChar.Octal oct -> $@"\{oct}"
| SpecialChar.Hexadecimal hex -> $@"\x{hex}"
| SpecialChar.ASCII ascii -> $@"\u{ascii}"
| :? Anchor.Anchor as anchor ->
match anchor with
| Anchor.Start -> @"\A"
| Anchor.StartOfLine -> @"^"
| Anchor.End -> @"\z"
| Anchor.EndOfLine -> @"$"
| Anchor.Boundary -> @"\b"
| Anchor.NotBoundary -> @"\B"
| Anchor.AfterMatch -> @"\G"
| :? CharSet.CharSet as charSet ->
$"[{charsetContent charSet}]"
| :? CharSet.Negated as negated ->
let (CharSet.Negated charSet) = negated
$"[^{charsetContent charSet}]"
| :? CharClass.CharClass as charClass ->
match charClass with
| CharClass.InUnicodeBlock block -> $@"\p{{{block}}}"
| CharClass.LetterOrDigit -> @"\w"
| CharClass.WhitespaceChar -> @"\s"
| CharClass.Digit -> @"\d"
| :? CharClass.Negated as negated ->
let (CharClass.Negated charClass) = negated
match charClass with
| CharClass.InUnicodeBlock block -> $@"\P{{{block}}}"
| CharClass.LetterOrDigit -> @"\W"
| CharClass.WhitespaceChar -> @"\S"
| CharClass.Digit -> @"\D"
| :? Quantity.Quantified as quantified ->
match quantified with
| Quantity.Greedy (regex,quantity) ->
let regex = regex |> groupable |> evaluate
match quantity with
| Quantity.Exactly amount ->
$@"{regex}{{{amount}}}"
| Quantity.AtLeast amount ->
if amount = 0 then
$@"{regex}*"
else if amount = 1 then
$@"{regex}+"
else
$@"{regex}{{{amount},}}"
| Quantity.Between (min,max) ->
if min = 0 && max = 1 then
$@"{regex}?"
else
$@"{regex}{{{min},{max}}}"
| Quantity.Lazy (regex,quantity) ->
let greedQuantified = Quantity.Greedy(regex,quantity) |> evaluate
greedQuantified + "?"
| :? Condition.Conditional as conditional ->
let evaluateCondition = function
| Condition.Regex regex -> regex |> evaluate
| Condition.Reference reference -> reference |> referenceString
$@"(?({conditional.If |> evaluateCondition}){conditional.Then |> evaluate}|{conditional.Else |> evaluate})"
| _ -> failwith "Not supported"
module Examples
open Krow.Regex
let bounded (regex:IRegex) =
Anchor.Start + regex + Anchor.End
let lineBounded (regex:IRegex) =
Anchor.StartOfLine + regex + Anchor.EndOfLine
let separatedList separator (regex:IRegex) =
regex + (separator + regex) * (0,())
module Guid =
let hexDigit = CharSet.Range('0', '9') / CharSet.Range('a', 'f')
let guid =
Regex.Sequence [
hexDigit * 8 + "-"
hexDigit * 4 + "-"
CharSet.Range('1', '5') + "-"
CharSet.OneOf ['8';'9';'a';'b']
hexDigit * 3 + "-"
hexDigit * 12
]
module Email =
let allowedSpecialChars = CharSet.OneOf [
'!';'#';'$';'%';'&';''';'*';'+';'/';'=';'?';'^';'_';'`';'{';'|';'}';'~';'-'
]
let alphaNumeric = CharSet.Range('a','z') / CharSet.Range('0','9')
let alphaNumericOrHyphen = alphaNumeric / "-"
module Hex =
let group1 =
["01";"08";"0B";"0C";"0E";"1F";"21";"23";"5B";"5D";"7F"]
|> List.map (fun a -> SpecialChar.Hexadecimal a :> IRegex)
|> Regex.OneOf
let group2 =
["01";"09";"0B";"0C";"0E";"7F"]
|> List.map (fun a -> SpecialChar.Hexadecimal a :> IRegex)
|> Regex.OneOf
let part = group1 / ( @"\" + group2)
let name = part * (0,())
let quotedName = "\"" + name + "\""
module User =
let stringPart = (alphaNumeric / allowedSpecialChars) * (1,())
let stringName = separatedList "." stringPart
let name = stringName / Hex.quotedName
module Ip =
let ipPart =
Regex.OneOf [
"25" + CharSet.Range('0','5')
"2" + CharSet.Range('0','4') + CharSet.Range('0','9')
CharSet.OneOf ['0';'1'] + CharSet.Range('0','9') + CharSet.Range('0','9')
]
let hexPart =
alphaNumericOrHyphen * (0,()) + alphaNumeric + ":" + Hex.name
let lastPart = ipPart / hexPart
let address = "[" + (ipPart + ".") * 3 + lastPart + "]"
module Domain =
let part = (alphaNumeric * (1,())) |> separatedList "-"
let name = part |> separatedList "."
let email = bounded (User.name + "@" + (Ip.address / Domain.name))
[<AutoOpen>]
module Krow.Regex.Operators
open Regex
type RegexSequence = RegexSequence with
static member (?<-) (RegexSequence, first:Sequence, second:Sequence) =
let (Sequence list1) = first
let (Sequence list2) = second
Sequence(list1 @ list2)
static member (?<-) (RegexSequence, first:IRegex, second:Sequence) =
(?<-) RegexSequence (Sequence [first]) second
static member (?<-) (RegexSequence, first:Sequence, second:IRegex) =
(?<-) RegexSequence first (Sequence [second])
static member (?<-) (RegexSequence, first:IRegex, second:IRegex) =
(?<-) RegexSequence (Sequence [first]) (Sequence [second])
static member (?<-) (RegexSequence, first:string, second:IRegex) =
(?<-) RegexSequence (Sequence [Literal first]) (Sequence [second])
static member (?<-) (RegexSequence, first:IRegex, second:string) =
(?<-) RegexSequence (Sequence [first]) (Sequence [Literal second])
static member inline (?<-) (RegexSequence, first, second) =
first + second
let inline (+) first second : 'R = ( (?<-) RegexSequence first second)
type RegexOneOf = RegexOneOf with
static member (?<-) (RegexOneOf, first:OneOf, second:OneOf) =
let (OneOf list1) = first
let (OneOf list2) = second
OneOf(list1 @ list2)
static member (?<-) (RegexOneOf, first:IRegex, second:OneOf) =
(?<-) RegexOneOf (OneOf [first]) second
static member (?<-) (RegexOneOf, first:OneOf, second:IRegex) =
(?<-) RegexOneOf first (OneOf [second])
static member (?<-) (RegexOneOf, first:IRegex, second:IRegex) =
OneOf [first;second]
static member (?<-) (RegexOneOf, first:CharSet.CharSet, second:CharSet.CharSet) =
match first,second with
| CharSet.Multiple charsets1, CharSet.Multiple charsets2 ->
CharSet.Multiple (charsets1 @ charsets2)
| CharSet.Multiple charsets1, charset2 ->
CharSet.Multiple (charsets1 @ [charset2])
| charset1, CharSet.Multiple charsets2 ->
CharSet.Multiple (charset1 :: charsets2)
| charset1, charset2 ->
CharSet.Multiple [charset1;charset2]
static member (?<-) (RegexOneOf, CharSet.Negated first, CharSet.Negated second) =
CharSet.Negated ((?<-) RegexOneOf first second)
static member (?<-) (RegexOneOf, first:string, second:IRegex) =
(?<-) RegexOneOf (OneOf [Literal first]) (OneOf [second])
static member (?<-) (RegexOneOf, first:IRegex, second:string) =
(?<-) RegexOneOf (OneOf [first]) (OneOf [Literal second])
static member inline (?<-) (RegexOneOf, first, second) =
first / second
let inline (/) first second : 'R = ( (?<-) RegexOneOf first second)
type RegexQuantification = RegexQuantification with
static member (?<-) (RegexQuantification, regex, quantity) =
Quantity.Greedy(regex, Quantity.Exactly quantity)
static member (?<-) (RegexQuantification, regex, quantity) =
Quantity.Greedy(regex, Quantity.Between quantity)
static member (?<-) (RegexQuantification, regex, quantity) =
let quantity, () = quantity
Quantity.Greedy(regex, Quantity.AtLeast quantity)
static member inline (?<-) (RegexQuantification, first, second) =
first * second
let inline ( * ) first second : 'R = ( (?<-) RegexQuantification first second)
type RegexLazyQuantification = RegexLazyQuantification with
static member (?<-) (RegexLazyQuantification, regex, quantity) =
Quantity.Lazy(regex, Quantity.Exactly quantity)
static member (?<-) (RegexLazyQuantification, regex, quantity) =
Quantity.Lazy(regex, Quantity.Between quantity)
static member (?<-) (RegexLazyQuantification, regex, quantity) =
let quantity, () = quantity
Quantity.Lazy(regex, Quantity.AtLeast quantity)
static member inline (?<-) (RegexLazyQuantification, first, second) =
first *? second
let inline ( *? ) first second : 'R = ( (?<-) RegexLazyQuantification first second)
type RegexNegation = RegexNegation with
static member (?<-) (RegexNegation, charClass:CharClass.CharClass, _) =
CharClass.Negated charClass
static member (?<-) (RegexNegation, charClass:CharClass.Negated, _) =
let (CharClass.Negated charClass) = charClass
charClass
static member (?<-) (RegexNegation, charClass:CharSet.CharSet, _) =
CharSet.Negated charClass
static member (?<-) (RegexNegation, charClass:CharSet.Negated, _) =
let (CharSet.Negated charClass) = charClass
charClass
static member (?<-) (RegexNegation, look:Look.Look, _) =
Look.Negated look
static member (?<-) (RegexNegation, look:Look.Negated, _) =
let (Look.Negated look) = look
look
static member inline (?<-) (RegexNegation, first, _) =
!first
let inline (!) first : 'R = ( (?<-) RegexNegation first ())
let aaa = !(Look.Ahead (Literal "aa"))
[<AutoOpen>]
module Krow.Regex.Types
type IRegex = interface end
module Regex =
type Literal =
| Literal of string
interface IRegex
type Raw =
| Raw of string
interface IRegex
type OneOf =
| OneOf of IRegex list
interface IRegex
type Sequence =
| Sequence of IRegex list interface IRegex
type internal NonCapturing =
| NonCapturing of IRegex
interface IRegex
module Mode =
type Mode =
| CaseInsensitive
| Multiline
| ExplicitCapture
| IgnoreUnescapedWhiteSpace
type WithModes =
| WithModes of Mode list * IRegex
interface IRegex
module Look =
type Look =
| Ahead of IRegex
| Behind of IRegex
interface IRegex
type Negated =
|Negated of Look interface IRegex
module Group =
type Reference =
| Positional of int
| Named of string
interface IRegex
type Kind =
// Capturing
| Capturing
| CapturingWithName of string
// Non capturing
| NonBacktrackingGrouping
// Balancing
| UnCapturing of Reference // Balancing while omitting first arg
| Balancing of string * Reference
type Group =
| Group of Kind * IRegex
interface IRegex
module SpecialChar =
type SpecialChar =
| WildCard
| Bell
| Backspace
| Tab
| VerticalTab
| CarriageReturn
| NewLine
| Escaped
| Octal of string
| Hexadecimal of string
| ASCII of string
interface IRegex
module Anchor =
type Anchor =
| Start
| StartOfLine
| End
| EndOfLine
| Boundary
| NotBoundary
| AfterMatch
interface IRegex
module CharSet =
type CharSet =
| OneOf of char list
| Range of char * char
| Multiple of CharSet list
interface IRegex
type Negated =
| Negated of CharSet
interface IRegex
module CharClass =
type CharClass =
| InUnicodeBlock of string
| LetterOrDigit
| WhitespaceChar
| Digit
interface IRegex
type Negated =
| Negated of CharClass
interface IRegex
module Quantity =
type Quantity =
| Exactly of int
| AtLeast of int
| Between of int * int
type Quantified =
| Greedy of IRegex * Quantity
| Lazy of IRegex * Quantity
interface IRegex
module Condition =
type Condition =
| Regex of IRegex
| Reference of Group.Reference
type Conditional =
{ If: Condition; Then: IRegex; Else: IRegex }
interface IRegex
@amieres
Copy link

amieres commented Feb 11, 2021

Guid:

let hexDigit    = InRange('0', '9') / InRange('a', 'f')
let hexDigits n = Exactly(uint32 n, hexDigit)

Sequence [
    hexDigits  8      + "-"
    hexDigits  4      + "-"
    InRange('1', '5')
    hexDigits  3      + "-"
    oneOf     "89ab"
    hexDigits  3      + "-"
    hexDigits 12
]
|> evaluate
|> printfn "%s"

//  (?:[0-9]|[a-f]){8}-(?:[0-9]|[a-f]){4}-[1-5](?:[0-9]|[a-f]){3}-[89ab](?:[0-9]|[a-f]){3}-(?:[0-9]|[a-f]){12}

@amieres
Copy link

amieres commented Feb 11, 2021

email:

let allowed  = NotOneOfEscaped (escape "<>()[].,;:@" + evaluate WhitespaceChar) |> MoreThanOnce
let listSep sep elems = elems + ManyTimesOrNone (Literal sep + elems)

listSep "." allowed + "@" + listSep "." allowed
|> evaluate
|> printfn "%s"

in this case allowed needed to include \s without further escaping. For this case I added:

...
| LiteralRegex  of string
| OneOfEscaped  of string
| NotOneOfEscaped of string
...
| LiteralRegex rx -> rx
| OneOfEscaped  string -> sprintf @"[%s]" string
| NotOneOfEscaped   string -> sprintf @"[^%s]" string
...

to allow for cases not contemplated or for composing with regex from other sources.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment