Skip to content

Instantly share code, notes, and snippets.

@AmadorMunozBerzosa
Last active June 19, 2021 16:36
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AmadorMunozBerzosa/3288771aaa199cc73acc7781bff4d49e to your computer and use it in GitHub Desktop.
Save AmadorMunozBerzosa/3288771aaa199cc73acc7781bff4d49e to your computer and use it in GitHub Desktop.
DSL wrapper for working with regular expressions in a readable manner in F#. Gist messes with the file ordering, but the correct file order is: Types > Evaluation > Operators > ActivePatterns > Examples
module Krow.Regex.ActivePatterns
open System.Text
let (|Regex|_|) (pattern:IRegex) input =
if input = null then
None
else
try
let match' = RegularExpressions.Regex.Match(input, pattern |> Regex.evaluate)
if match'.Success then
Some( List.tail [ for groups in match'.Groups -> groups.Value ])
else
None
with _ -> None
let (|Regexs|) (pattern:IRegex) input =
if input = null then [] else
try
let matches = RegularExpressions.Regex.Matches(input, pattern |> Regex.evaluate)
[ for match' in matches do (List.tail [ for group in match'.Groups -> group.Value ]) ]
with e -> []
[<AutoOpen>]
module Krow.Regex.Evaluation
open Krow.Regex.Types
open System.Text
module Regex =
let escape s = (RegularExpressions.Regex.Escape s).Replace("]", "\]")
let unescape (s:string) = (RegularExpressions.Regex.Unescape (s.Replace("\]", "]")))
[<AutoOpen>]
module private Helpers =
let groupable (regex:IRegex) =
match regex with
| :? Regex.Sequence | :? Regex.OneOf -> Regex.NonCapturing regex :> IRegex
| _ -> regex
let listGroupable (regex:IRegex) =
match regex with
| :? Regex.OneOf -> Regex.NonCapturing regex :> IRegex
| _ -> regex
let rec charsetContent charSet =
match charSet with
| CharSet.OneOf chars ->
let string = (new string(chars |> List.toArray)).Replace("/",@"\/")
$@"{string}"
| CharSet.Range (first,last) ->
$@"{first}-{last}"
| CharSet.Multiple charSets ->
charSets |> List.map charsetContent |> List.reduce (+)
let referenceString reference =
match reference with
| Group.Positional number -> number |> string
| Group.Named string -> string
|> escape
let rec evaluate (regex:IRegex) =
match regex with
| :? Regex.Literal as literal ->
let (Regex.Literal literal) = literal
escape literal
| :? Regex.Raw as literal ->
let (Regex.Raw literal) = literal
literal
| :? Regex.OneOf as oneOf ->
let (Regex.OneOf regexs) = oneOf
regexs
|> List.map evaluate |> String.concat "|"
| :? Regex.Sequence as sequence ->
let (Regex.Sequence regexs) = sequence
regexs |> List.map (listGroupable >> evaluate) |> String.concat ""
| :? Regex.NonCapturing as grouping ->
let (Regex.NonCapturing regex) = grouping
$@"(?:{evaluate regex})"
| :? Mode.WithModes as withModes ->
let (Mode.WithModes (modes, regex)) = withModes
let modeChar = function
| Mode.CaseInsensitive -> "i"
| Mode.Multiline -> "m"
| Mode.ExplicitCapture -> "n"
| Mode.IgnoreUnescapedWhiteSpace -> "x"
let modeList = modes |> List.map modeChar |> String.concat ""
$@"(?{modeList}:{regex})"
| :? Group.Reference as reference ->
let string = reference |> referenceString
match reference with
| Group.Positional _ -> $@"\{string}"
| Group.Named _ -> $@"\k<{string}>"
| :? Look.Look as look ->
match look with
| Look.Ahead regex -> $@"(?={regex |> evaluate})"
| Look.Behind regex -> $@"(?<={regex |> evaluate})"
| :? Look.Negated as look ->
let (Look.Negated look) = look
match look with
| Look.Ahead regex -> $@"(?!{regex |> evaluate})"
| Look.Behind regex -> $@"(?<!{regex |> evaluate})"
| :? Group.Group as group ->
let (Group.Group(group,regex)) = group
let regex = regex |> evaluate
match group with
// Capturing
| Group.Capturing -> $@"({regex})"
| Group.CapturingWithName name -> $@"(?<{name}>{regex})"
// Non capturing
| Group.NonBacktrackingGrouping -> $@"(?>{regex})"
// Balancing
| Group.UnCapturing reference ->
$@"(?<-{reference |> referenceString}>{regex})"
| Group.Balancing (newName, reference) ->
$@"(?<{newName |> escape}-{reference |> referenceString}>{regex})"
| :? SpecialChar.SpecialChar as special ->
match special with
| SpecialChar.WildCard -> @"."
| SpecialChar.Bell -> @"\a"
| SpecialChar.Backspace -> @"\b"
| SpecialChar.Tab -> @"\t"
| SpecialChar.VerticalTab -> @"\v"
| SpecialChar.CarriageReturn -> @"\r"
| SpecialChar.NewLine -> @"\n"
| SpecialChar.Escaped -> @"\e"
| SpecialChar.Octal oct -> $@"\{oct}"
| SpecialChar.Hexadecimal hex -> $@"\x{hex}"
| SpecialChar.ASCII ascii -> $@"\u{ascii}"
| :? Anchor.Anchor as anchor ->
match anchor with
| Anchor.Start -> @"\A"
| Anchor.StartOfLine -> @"^"
| Anchor.End -> @"\z"
| Anchor.EndOfLine -> @"$"
| Anchor.Boundary -> @"\b"
| Anchor.NotBoundary -> @"\B"
| Anchor.AfterMatch -> @"\G"
| :? CharSet.CharSet as charSet ->
$"[{charsetContent charSet}]"
| :? CharSet.Negated as negated ->
let (CharSet.Negated charSet) = negated
$"[^{charsetContent charSet}]"
| :? CharClass.CharClass as charClass ->
match charClass with
| CharClass.InUnicodeBlock block -> $@"\p{{{block}}}"
| CharClass.LetterOrDigit -> @"\w"
| CharClass.WhitespaceChar -> @"\s"
| CharClass.Digit -> @"\d"
| :? CharClass.Negated as negated ->
let (CharClass.Negated charClass) = negated
match charClass with
| CharClass.InUnicodeBlock block -> $@"\P{{{block}}}"
| CharClass.LetterOrDigit -> @"\W"
| CharClass.WhitespaceChar -> @"\S"
| CharClass.Digit -> @"\D"
| :? Quantity.Quantified as quantified ->
match quantified with
| Quantity.Greedy (regex,quantity) ->
let regex = regex |> groupable |> evaluate
match quantity with
| Quantity.Exactly amount ->
$@"{regex}{{{amount}}}"
| Quantity.AtLeast amount ->
if amount = 0 then
$@"{regex}*"
else if amount = 1 then
$@"{regex}+"
else
$@"{regex}{{{amount},}}"
| Quantity.Between (min,max) ->
if min = 0 && max = 1 then
$@"{regex}?"
else
$@"{regex}{{{min},{max}}}"
| Quantity.Lazy (regex,quantity) ->
let greedQuantified = Quantity.Greedy(regex,quantity) |> evaluate
greedQuantified + "?"
| :? Condition.Conditional as conditional ->
let evaluateCondition = function
| Condition.Regex regex -> regex |> evaluate
| Condition.Reference reference -> reference |> referenceString
$@"(?({conditional.If |> evaluateCondition}){conditional.Then |> evaluate}|{conditional.Else |> evaluate})"
| _ -> failwith "Not supported"
module Examples
open Krow.Regex
let bounded (regex:IRegex) =
Anchor.Start + regex + Anchor.End
let lineBounded (regex:IRegex) =
Anchor.StartOfLine + regex + Anchor.EndOfLine
let separatedList separator (regex:IRegex) =
regex + (separator + regex) * (0,())
module Guid =
let hexDigit = CharSet.Range('0', '9') / CharSet.Range('a', 'f')
let guid =
Regex.Sequence [
hexDigit * 8 + "-"
hexDigit * 4 + "-"
CharSet.Range('1', '5') + "-"
CharSet.OneOf ['8';'9';'a';'b']
hexDigit * 3 + "-"
hexDigit * 12
]
module Email =
let allowedSpecialChars = CharSet.OneOf [
'!';'#';'$';'%';'&';''';'*';'+';'/';'=';'?';'^';'_';'`';'{';'|';'}';'~';'-'
]
let alphaNumeric = CharSet.Range('a','z') / CharSet.Range('0','9')
let alphaNumericOrHyphen = alphaNumeric / "-"
module Hex =
let group1 =
["01";"08";"0B";"0C";"0E";"1F";"21";"23";"5B";"5D";"7F"]
|> List.map (fun a -> SpecialChar.Hexadecimal a :> IRegex)
|> Regex.OneOf
let group2 =
["01";"09";"0B";"0C";"0E";"7F"]
|> List.map (fun a -> SpecialChar.Hexadecimal a :> IRegex)
|> Regex.OneOf
let part = group1 / ( @"\" + group2)
let name = part * (0,())
let quotedName = "\"" + name + "\""
module User =
let stringPart = (alphaNumeric / allowedSpecialChars) * (1,())
let stringName = separatedList "." stringPart
let name = stringName / Hex.quotedName
module Ip =
let ipPart =
Regex.OneOf [
"25" + CharSet.Range('0','5')
"2" + CharSet.Range('0','4') + CharSet.Range('0','9')
CharSet.OneOf ['0';'1'] + CharSet.Range('0','9') + CharSet.Range('0','9')
]
let hexPart =
alphaNumericOrHyphen * (0,()) + alphaNumeric + ":" + Hex.name
let lastPart = ipPart / hexPart
let address = "[" + (ipPart + ".") * 3 + lastPart + "]"
module Domain =
let part = (alphaNumeric * (1,())) |> separatedList "-"
let name = part |> separatedList "."
let email = bounded (User.name + "@" + (Ip.address / Domain.name))
[<AutoOpen>]
module Krow.Regex.Operators
open Regex
type RegexSequence = RegexSequence with
static member (?<-) (RegexSequence, first:Sequence, second:Sequence) =
let (Sequence list1) = first
let (Sequence list2) = second
Sequence(list1 @ list2)
static member (?<-) (RegexSequence, first:IRegex, second:Sequence) =
(?<-) RegexSequence (Sequence [first]) second
static member (?<-) (RegexSequence, first:Sequence, second:IRegex) =
(?<-) RegexSequence first (Sequence [second])
static member (?<-) (RegexSequence, first:IRegex, second:IRegex) =
(?<-) RegexSequence (Sequence [first]) (Sequence [second])
static member (?<-) (RegexSequence, first:string, second:IRegex) =
(?<-) RegexSequence (Sequence [Literal first]) (Sequence [second])
static member (?<-) (RegexSequence, first:IRegex, second:string) =
(?<-) RegexSequence (Sequence [first]) (Sequence [Literal second])
static member inline (?<-) (RegexSequence, first, second) =
first + second
let inline (+) first second : 'R = ( (?<-) RegexSequence first second)
type RegexOneOf = RegexOneOf with
static member (?<-) (RegexOneOf, first:OneOf, second:OneOf) =
let (OneOf list1) = first
let (OneOf list2) = second
OneOf(list1 @ list2)
static member (?<-) (RegexOneOf, first:IRegex, second:OneOf) =
(?<-) RegexOneOf (OneOf [first]) second
static member (?<-) (RegexOneOf, first:OneOf, second:IRegex) =
(?<-) RegexOneOf first (OneOf [second])
static member (?<-) (RegexOneOf, first:IRegex, second:IRegex) =
OneOf [first;second]
static member (?<-) (RegexOneOf, first:CharSet.CharSet, second:CharSet.CharSet) =
match first,second with
| CharSet.Multiple charsets1, CharSet.Multiple charsets2 ->
CharSet.Multiple (charsets1 @ charsets2)
| CharSet.Multiple charsets1, charset2 ->
CharSet.Multiple (charsets1 @ [charset2])
| charset1, CharSet.Multiple charsets2 ->
CharSet.Multiple (charset1 :: charsets2)
| charset1, charset2 ->
CharSet.Multiple [charset1;charset2]
static member (?<-) (RegexOneOf, CharSet.Negated first, CharSet.Negated second) =
CharSet.Negated ((?<-) RegexOneOf first second)
static member (?<-) (RegexOneOf, first:string, second:IRegex) =
(?<-) RegexOneOf (OneOf [Literal first]) (OneOf [second])
static member (?<-) (RegexOneOf, first:IRegex, second:string) =
(?<-) RegexOneOf (OneOf [first]) (OneOf [Literal second])
static member inline (?<-) (RegexOneOf, first, second) =
first / second
let inline (/) first second : 'R = ( (?<-) RegexOneOf first second)
type RegexQuantification = RegexQuantification with
static member (?<-) (RegexQuantification, regex, quantity) =
Quantity.Greedy(regex, Quantity.Exactly quantity)
static member (?<-) (RegexQuantification, regex, quantity) =
Quantity.Greedy(regex, Quantity.Between quantity)
static member (?<-) (RegexQuantification, regex, quantity) =
let quantity, () = quantity
Quantity.Greedy(regex, Quantity.AtLeast quantity)
static member inline (?<-) (RegexQuantification, first, second) =
first * second
let inline ( * ) first second : 'R = ( (?<-) RegexQuantification first second)
type RegexLazyQuantification = RegexLazyQuantification with
static member (?<-) (RegexLazyQuantification, regex, quantity) =
Quantity.Lazy(regex, Quantity.Exactly quantity)
static member (?<-) (RegexLazyQuantification, regex, quantity) =
Quantity.Lazy(regex, Quantity.Between quantity)
static member (?<-) (RegexLazyQuantification, regex, quantity) =
let quantity, () = quantity
Quantity.Lazy(regex, Quantity.AtLeast quantity)
static member inline (?<-) (RegexLazyQuantification, first, second) =
first *? second
let inline ( *? ) first second : 'R = ( (?<-) RegexLazyQuantification first second)
type RegexNegation = RegexNegation with
static member (?<-) (RegexNegation, charClass:CharClass.CharClass, _) =
CharClass.Negated charClass
static member (?<-) (RegexNegation, charClass:CharClass.Negated, _) =
let (CharClass.Negated charClass) = charClass
charClass
static member (?<-) (RegexNegation, charClass:CharSet.CharSet, _) =
CharSet.Negated charClass
static member (?<-) (RegexNegation, charClass:CharSet.Negated, _) =
let (CharSet.Negated charClass) = charClass
charClass
static member (?<-) (RegexNegation, look:Look.Look, _) =
Look.Negated look
static member (?<-) (RegexNegation, look:Look.Negated, _) =
let (Look.Negated look) = look
look
static member inline (?<-) (RegexNegation, first, _) =
!first
let inline (!) first : 'R = ( (?<-) RegexNegation first ())
let aaa = !(Look.Ahead (Literal "aa"))
[<AutoOpen>]
module Krow.Regex.Types
type IRegex = interface end
module Regex =
type Literal =
| Literal of string
interface IRegex
type Raw =
| Raw of string
interface IRegex
type OneOf =
| OneOf of IRegex list
interface IRegex
type Sequence =
| Sequence of IRegex list interface IRegex
type internal NonCapturing =
| NonCapturing of IRegex
interface IRegex
module Mode =
type Mode =
| CaseInsensitive
| Multiline
| ExplicitCapture
| IgnoreUnescapedWhiteSpace
type WithModes =
| WithModes of Mode list * IRegex
interface IRegex
module Look =
type Look =
| Ahead of IRegex
| Behind of IRegex
interface IRegex
type Negated =
|Negated of Look interface IRegex
module Group =
type Reference =
| Positional of int
| Named of string
interface IRegex
type Kind =
// Capturing
| Capturing
| CapturingWithName of string
// Non capturing
| NonBacktrackingGrouping
// Balancing
| UnCapturing of Reference // Balancing while omitting first arg
| Balancing of string * Reference
type Group =
| Group of Kind * IRegex
interface IRegex
module SpecialChar =
type SpecialChar =
| WildCard
| Bell
| Backspace
| Tab
| VerticalTab
| CarriageReturn
| NewLine
| Escaped
| Octal of string
| Hexadecimal of string
| ASCII of string
interface IRegex
module Anchor =
type Anchor =
| Start
| StartOfLine
| End
| EndOfLine
| Boundary
| NotBoundary
| AfterMatch
interface IRegex
module CharSet =
type CharSet =
| OneOf of char list
| Range of char * char
| Multiple of CharSet list
interface IRegex
type Negated =
| Negated of CharSet
interface IRegex
module CharClass =
type CharClass =
| InUnicodeBlock of string
| LetterOrDigit
| WhitespaceChar
| Digit
interface IRegex
type Negated =
| Negated of CharClass
interface IRegex
module Quantity =
type Quantity =
| Exactly of int
| AtLeast of int
| Between of int * int
type Quantified =
| Greedy of IRegex * Quantity
| Lazy of IRegex * Quantity
interface IRegex
module Condition =
type Condition =
| Regex of IRegex
| Reference of Group.Reference
type Conditional =
{ If: Condition; Then: IRegex; Else: IRegex }
interface IRegex
@amieres
Copy link

amieres commented Feb 11, 2021

Here is a version of the evaluator the uses sprintf for those in prior versions of F#:

    let escape s = (RegularExpressions.Regex.Escape s).Replace("]", "\]")    // changed

    let rec evaluate = function
        | Literal string  -> escape string
        | LiteralRegex rx -> rx   // added

        | AnyChar         -> @"."
        | Bell            -> @"\a"
        | Backspace       -> @"\b"
        | Tab             -> @"\t"
        | VerticalTab     -> @"\v"
        | CarriageReturn  -> @"\r"
        | NewLine         -> @"\n"
        | Escaped         -> @"\e"
        | Octal oct       -> sprintf @"\%s"  (string oct  )
        | Hexadecimal hex -> sprintf @"\x%s" (string hex  )
        | ASCII ascii     -> sprintf @"\u%s" (string ascii)

        // Anchors
        | Start         -> @"\A"
        | StartOfLine   -> @"^"
        | End           -> @"\z"
        | EndOfLine     -> @"$"
        | Boundary      -> @"\b"
        | NotBoundary   -> @"\B"
        | AfterMatch    -> @"\G"

        // CHAR CLASSES
        | OneOf chars          ->
                                  let string = new string(chars |> List.toArray) |> escape
                                  sprintf @"[%s]" string
        | OneOfEscaped  string -> sprintf @"[%s]" string   // added
        | InRange (first,last) -> sprintf @"[%s-%s]" (string first |> escape) (string last |> escape)
        | InUnicodeBlock block -> sprintf @"\p{{%s}}" (block)
        | LetterOrDigit        -> @"\w"
        | WhitespaceChar       -> @"\s"
        | Digit                -> @"\d"

        | NotOneOf chars           ->
                                      let string = new string(chars |> List.toArray) |> escape
                                      sprintf @"[^%s]" (string)
        | NotOneOfEscaped   string -> sprintf @"[^%s]" string   // added
        | NotInRange (first, last) -> sprintf @"[^%s-%s]" (string first |> escape) (string last |> escape)
        | NotInUnicodeBlock block  -> sprintf @"\P{{%s}}" (block)
        | NotLetterOrDigit         -> @"\W"
        | NotWhitespaceChar        -> @"\S"
        | NotDigit                 -> @"\D"

        // Capturing
        | Capturing                regex  -> sprintf @"(%s)" (regex |> evaluate)
        | CapturingWithName (name, regex) -> sprintf @"(?<%s>%s)" (name) (regex |> evaluate)
        | ReferenceTo reference ->
            match reference with
            | Positional number -> sprintf @"\%s" (number |> string |> escape)
            | Named string -> sprintf @"\k<%s>" (string |> escape)
        // Non capturing
        | Grouping regex -> sprintf @"(?:%s)" (regex |> evaluate)
        | NonBacktrackingGrouping regex -> sprintf @"(?>%s)" (regex |> evaluate)
        // Options
        | WithOptions (modes, regex) ->
            let modeList = modes |> List.map (modeChar>>string) |> String.concat ""
            sprintf @"(?%s:%s)" (modeList) (regex |> evaluate)
        // Lookaround
        | LookingAheadFor regex -> sprintf @"(?=%s)" (regex |> evaluate)
        | LookingAheadAgainst regex -> sprintf @"(?!%s)" (regex |> evaluate)
        | LookingBehindFor regex -> sprintf @"(?<=%s)" (regex |> evaluate)
        | NotLookingBehindAgainst regex -> sprintf @"(?<!%s)" (regex |> evaluate)

        // Balancing
        | UnCapturing (reference,regex) ->
            sprintf @"(?<-%s>%s)" (reference |>referenceString) (regex |> evaluate)
        | Balancing (newName, reference, regex) ->
            sprintf @"(?<%s-%s>%s)" (newName |> escape) (reference |>referenceString) (regex |> evaluate)

        // Combination
        | Either regexs -> regexs |> List.map evaluate |> String.concat "|"
        | Sequence regexs -> regexs |> List.map evaluate |> String.concat ""

        // Ternaries
        | If (condition,yes,no) ->
            sprintf @"(?(%s)%s|%s)" (condition |> evaluate) (yes |> evaluate) (no |> evaluate)
        | IfGroup (reference,yes,no) ->
            sprintf @"(?(%s)%s|%s)" (reference |> referenceString) (yes |> evaluate) (no |> evaluate)

        // QUANTIFICATION
        | OneTimeOrNone          regex  -> sprintf @"%s?"          (regex |> quantifiable |> evaluate)
        | MoreThanOnce           regex  -> sprintf @"%s+"          (regex |> quantifiable |> evaluate)
        | ManyTimesOrNone        regex  -> sprintf @"%s*"          (regex |> quantifiable |> evaluate)
        | Exactly       (amount ,regex) -> sprintf @"%s{{%s}}"     (regex |> quantifiable |> evaluate) (string amount)
        | AtLeast       (amount ,regex) -> sprintf @"%s{{%s,}}"    (regex |> quantifiable |> evaluate) (string amount)
        | Between       (min,max,regex) -> sprintf @"%s{{%s,%s}}"  (regex |> quantifiable |> evaluate) (string min) (string max)

        | OneTimeOrNoneLazily    regex  -> sprintf @"%s??"         (regex |> quantifiable |> evaluate)
        | MoreThanOnceLazily     regex  -> sprintf @"%s+?"         (regex |> quantifiable |> evaluate)
        | ManyTimesOrNoneLazily  regex  -> sprintf @"%s*?"         (regex |> quantifiable |> evaluate)
        | ExactlyLazily (amount, regex) -> sprintf @"%s{{%s}}?"    (regex |> quantifiable |> evaluate) (string amount)
        | AtLeastLazily (amount, regex) -> sprintf @"%s{{%s,}}?"   (regex |> quantifiable |> evaluate) (string amount)
        | BetweenLazily (min,max,regex) -> sprintf @"%s{{%s,%s}}?" (regex |> quantifiable |> evaluate) (string min) (string max)

@amieres
Copy link

amieres commented Feb 11, 2021

It is working very well.
Except for ||| that doesn't seem to overload like + does.

  • Literal "this" ||| Literal "that" works
  • Literal "this" ||| "that" doesn't work
    I'm not sure why

So I changed to '/'. I know is not symmetrical and it doesn't seem commutative. OTOH it confers the meaning well.
For instance: Literal "Err" / "Warn" / "Info" is very readable.

Also RegularExpressions.Regex.Escape doesn't escape the character ] which is a problem when doing OneOf, so I changed it to:

    let escape s = (RegularExpressions.Regex.Escape s).Replace("]", "\]")

Here is an example:

let parseErrWarnInfo = """
Err (1, 7) - (1, 12): "This shows over there as an error".
Warn (2, 7) - (2, 12): "This shows over there as a warning".
Info (3, 7) - (3, 12): "This shows over there as information".
"""

let digitsCap  = MoreThanOnce Digit |> Capturing
let spaces     = ManyTimesOrNone WhitespaceChar 
let coords     = "(" + digitsCap + "," + spaces + digitsCap + ")"
let notOneOf v = Seq.toList v |> NotOneOf 

let errWarnInfo =
    Sequence [
        Capturing(Literal "Err" / "Warn" / "Info")             + " "
        coords                                                 + " - "
        coords                                                 + ": "
        "\"" + Capturing (notOneOf "\"" |> MoreThanOnceLazily) + "\"."
    ]
    |> evaluate

printfn "Regex: %s" errWarnInfo  

// (Err|Warn|Info)\ \((\d+),\s*(\d+)\)\ -\ \((\d+),\s*(\d+)\):\ "([^"]+?)"\.

I also changed the operator for Either so it preserves the original order. I doesn't make a difference to the regex, but the rearranging was a little disconcerting.

@amieres
Copy link

amieres commented Feb 11, 2021

With a couple of active patterns:

    let (|Regex|_|) pattern input =
        if input = null then None else
        try 
            let m = RegularExpressions.Regex.Match(input, pattern)
            if m.Success then Some(List.tail [ for g in m.Groups -> g.Value ])
            else None
        with e -> None

    let (|Regexs|) pattern input =
        if input = null then [| |] else
        try 
            let ms = RegularExpressions.Regex.Matches(input, pattern)
            [| for m in ms do yield (List.tail [ for g in m.Groups -> g.Value ]) |] 
        with e -> [| |]

then it can be used very nicely like this:

match parseErrWarnInfo with Regexs errWarnInfo r -> printfn "Matches found:\n%A" r

match parseErrWarnInfo with
| Regex errWarnInfo r -> printfn "First Match: %A" r
| _ -> printfn "No match found"

match parseErrWarnInfo with Regexs errWarnInfo r -> printfn "Matches found:\n%A" r

//  Matches found:
//  [|["Err"; "1"; "7"; "1"; "12"; "This shows over there as an error"];
//    ["Warn"; "2"; "7"; "2"; "12"; "This shows over there as a warning"];
//    ["Info"; "3"; "7"; "3"; "12"; "This shows over there as information"]|]

match parseErrWarnInfo with
| Regex errWarnInfo r -> printfn "First Match: %A" r
| _ -> printfn "No match found"

// First Match: ["Err"; "1"; "7"; "1"; "12"; "This shows over there as an error"]

@amieres
Copy link

amieres commented Feb 11, 2021

Guid:

let hexDigit    = InRange('0', '9') / InRange('a', 'f')
let hexDigits n = Exactly(uint32 n, hexDigit)

Sequence [
    hexDigits  8      + "-"
    hexDigits  4      + "-"
    InRange('1', '5')
    hexDigits  3      + "-"
    oneOf     "89ab"
    hexDigits  3      + "-"
    hexDigits 12
]
|> evaluate
|> printfn "%s"

//  (?:[0-9]|[a-f]){8}-(?:[0-9]|[a-f]){4}-[1-5](?:[0-9]|[a-f]){3}-[89ab](?:[0-9]|[a-f]){3}-(?:[0-9]|[a-f]){12}

@amieres
Copy link

amieres commented Feb 11, 2021

email:

let allowed  = NotOneOfEscaped (escape "<>()[].,;:@" + evaluate WhitespaceChar) |> MoreThanOnce
let listSep sep elems = elems + ManyTimesOrNone (Literal sep + elems)

listSep "." allowed + "@" + listSep "." allowed
|> evaluate
|> printfn "%s"

in this case allowed needed to include \s without further escaping. For this case I added:

...
| LiteralRegex  of string
| OneOfEscaped  of string
| NotOneOfEscaped of string
...
| LiteralRegex rx -> rx
| OneOfEscaped  string -> sprintf @"[%s]" string
| NotOneOfEscaped   string -> sprintf @"[^%s]" string
...

to allow for cases not contemplated or for composing with regex from other sources.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment