Outputs:
NoLeft
Leftmost (Start (0, 4))
Leftmost (Start (1, 5))
Leftmost (Start (1, 5))
Leftmost (Start (1, 7))
#nowarn "3535" | |
// https://sebfisch.github.io/haskell-regexp/regexp-play.pdf | |
type Reg<'char, 'semiring> = | |
{ Empty : 'semiring; | |
Final : 'semiring; | |
Reg : Re<'char, 'semiring> } | |
and Re<'char, 'semiring> = | |
| Eps | |
| Sym of ('char -> 'semiring) | |
| Alt of (Reg<'char, 'semiring> * Reg<'char, 'semiring>) | |
| Seq of (Reg<'char, 'semiring> * Reg<'char, 'semiring>) | |
| Rep of (Reg<'char, 'semiring>) | |
type Semiring<'self when 'self :> Semiring<'self> and 'self : equality> = | |
interface | |
static abstract Zero : 'self | |
static abstract One : 'self | |
static abstract Add : 'self -> 'self -> 'self | |
static abstract Mul : 'self -> 'self -> 'self | |
end | |
let eps<'c, 's when 's :> Semiring<'s>> : Reg<'c, 's> = | |
{ Empty = 's.One; Final = 's.Zero; Reg = Eps } | |
let sym<'c, 's when 's :> Semiring<'s>> (f: ('c -> 's)) : Reg<'c, 's> = | |
{ Empty = 's.Zero; Final = 's.Zero; Reg = Sym f } | |
let alt<'c, 's when 's :> Semiring<'s>> (p: Reg<'c, 's>) (q: Reg<'c, 's>) = | |
{ Empty = 's.Add p.Empty q.Empty; | |
Final = 's.Add p.Final q.Final; | |
Reg = Alt (p, q) } | |
let seq<'c, 's when 's :> Semiring<'s>> (p: Reg<'c, 's>) (q: Reg<'c, 's>) : Reg<'c, 's> = | |
{ Empty = 's.Mul p.Empty q.Empty; | |
Final = 's.Add ('s.Mul p.Final q.Empty) q.Final; | |
Reg = Seq (p, q) } | |
let rep<'c, 's when 's :> Semiring<'s>> (r: Reg<'c, 's>) : Reg<'c, 's> = | |
{ Empty = 's.One; | |
Final = r.Final; | |
Reg = Rep r } | |
let rec shift<'c, 's when 's :> Semiring<'s>> (m:'s) (r:Re<'c,'s>) (c:'c) : Reg<'c,'s> = | |
match r with | |
| Eps -> eps | |
| Sym f -> let fin = 's.Mul m (f c) in { (sym f) with Final = fin } | |
| Alt (p, q) -> alt (shift m p.Reg c) (shift m q.Reg c) | |
| Seq (p, q) -> seq (shift m p.Reg c) (shift ('s.Add ('s.Mul m p.Empty) p.Final) q.Reg c) | |
| Rep r -> rep (shift ('s.Add m r.Final) r.Reg c) | |
let re_match<'c, 's when 's :> Semiring<'s>> (r: Reg<'c, 's>) (s: seq<'c>) : 's = | |
if Seq.isEmpty s then | |
r.Empty | |
else | |
(Seq.fold | |
(shift 's.Zero << _.Reg) | |
(shift 's.One r.Reg (Seq.head s)) | |
(Seq.tail s)).Final | |
module Seq = let inline enumerate<'a> (seq: 'a seq) : seq<int*'a> = System.Linq.Enumerable.Select(seq, fun it i -> i,it) | |
let re_submatch<'c, 's when 's :> Semiring<'s>> (r: Reg<int*'c, 's>) (s: 'c seq) : 's = | |
let arb = rep (sym (fun t -> 's.One)) | |
re_match (seq arb (seq r arb)) (Seq.enumerate s) | |
type Start = NoStart | Start of int*int | |
type Leftmost = | |
NoLeft | Leftmost of Start | |
interface Semiring<Leftmost> with | |
static member Zero = NoLeft | |
static member One = Leftmost NoStart | |
static member Add l r = | |
let leftmost l r = | |
match l, r with | |
| NoStart, NoStart -> NoStart | |
| NoStart, Start(a,b) -> Start(a,b) | |
| Start(a,b), NoStart -> Start(a,b) | |
| Start(la,lb), Start(ra,rb) -> Start(min la ra, max lb rb) | |
match l, r with | |
| NoLeft, r -> r | |
| l, NoLeft -> l | |
| Leftmost l, Leftmost r -> Leftmost (leftmost l r) | |
static member Mul l r = | |
let start l r = | |
match l, r with | |
| NoStart, r -> r | |
| Start(l,r), NoStart -> Start(l,r) | |
| Start(la,ra), Start(lb,rb) -> Start(min la ra, max lb rb) | |
match l, r with | |
| NoLeft, _ -> NoLeft | |
| _, NoLeft -> NoLeft | |
| Leftmost l, Leftmost r -> Leftmost (start l r) | |
end | |
let lsym<'c when 'c : equality> (c:'c) = | |
let weight (pos, x) = | |
match x with | |
| x when x = c -> Leftmost (Start (pos,pos)) | |
| _ -> NoLeft | |
sym weight | |
let a = lsym 'a' | |
let ab = rep (a |> alt <| lsym 'b') | |
let aaba = seq a (seq ab a) | |
let m1 : Leftmost = re_submatch aaba "ab" | |
let m2 : Leftmost = re_submatch aaba "aaaba" | |
let m3 : Leftmost = re_submatch aaba "bababa" | |
let m4 : Leftmost = re_submatch aaba "bababab" | |
let m5 : Leftmost = re_submatch aaba "babababa" | |
printf "%A\n" m1 | |
printf "%A\n" m2 | |
printf "%A\n" m3 | |
printf "%A\n" m4 | |
printf "%A\n" m5 |