Skip to content

Instantly share code, notes, and snippets.

@jozefg
Created May 7, 2015 03:38
Show Gist options
  • Save jozefg/01a1360512d8f34052d1 to your computer and use it in GitHub Desktop.
Save jozefg/01a1360512d8f34052d1 to your computer and use it in GitHub Desktop.
Regex matcher
module regex where
open import Data.Char using (Char; _≟_)
open import Data.List using (List ; _∷_ ; [])
open import Relation.Binary.PropositionalEquality
open import Relation.Nullary
open import Data.Empty using (⊥)
-- Reason for this is because it simplifies pattern matching.
String : Set
String = List Char
data Regex : Set where
both : Regex → Regex → Regex
then : Regex → Regex → Regex
or : Regex → Regex → Regex
char : Char → Regex
star : Regex → Regex
empty : Regex
data Split : String → String → String → Set where
nil : (s : String) → Split s [] s
cons : (c : Char)(s : String){s1 s2 : String}
→ Split s s1 s2
→ Split (c ∷ s) (c ∷ s1) s2
data _∈_ : String → Regex → Set where
char : (c : Char) → (c ∷ []) ∈ char c
or1 : (s : String)(r1 r2 : Regex)
→ s ∈ r1
→ s ∈ or r1 r2
or2 : (s : String)(r1 r2 : Regex)
→ s ∈ r2
→ s ∈ or r1 r2
both : (s : String)(r1 r2 : Regex)
→ s ∈ r1 → s ∈ r2
→ s ∈ both r1 r2
then : (r₁ r₂ : Regex)(s s1 s2 : String)
→ Split s s1 s2
→ s1 ∈ r₁
→ s2 ∈ r₂
→ s ∈ then r₁ r₂
starz : (r : Regex) → [] ∈ star r
stars : (r : Regex)(s s1 s2 : String)
→ Split s s1 s2
→ s1 ∈ r
→ s2 ∈ star r
→ s ∈ star r
π₁ : {r r₂ : Regex}{s : String} → s ∈ both r r₂ → s ∈ r
π₁ (both s r1 r2 i i₁) = i
π₂ : {r r₂ : Regex}{s : String} → s ∈ both r r₂ → s ∈ r₂
π₂ (both s r1 r2 i i₁) = i₁
either : {r r₂ : Regex}{s : String}
→ (s ∈ r → ⊥) → (s ∈ r₂ → ⊥)
→ s ∈ or r r₂ → ⊥
either l r₁ (or1 s r r₂ p) = l p
either l r₁ (or2 s r r₂ p) = r₁ p
¬-char : {c c₁ : Char}{s : String} → c ≢ c₁ → (c ∷ s) ∈ char c₁ → ⊥
¬-char neq (char c) = neq refl
data HasSplit (s : String)(P : (s₁ s₂ : String) → Split s s₁ s₂ → Set) : Set where
exists : (s₁ s₂ : String)(sp : Split s s₁ s₂)
→ P s₁ s₂ sp
→ HasSplit s P
ShiftOver : (s : String)(c : Char)
(P : (s₁ s₂ : String) → Split (c ∷ s) s₁ s₂ → Set)
→ (s₁ s₂ : String) → Split s s₁ s₂ → Set
ShiftOver s c P s₁ s₂ sp = P (c ∷ s₁) s₂ (cons c s sp)
shiftOverDec : {s : String}(c : Char)
{P : (s₁ s₂ : String) → Split (c ∷ s) s₁ s₂ → Set}
→ ((s₁ s₂ : String)(sp : Split (c ∷ s) s₁ s₂) → Dec (P s₁ s₂ sp))
→ ((s₁ s₂ : String)(sp : Split s s₁ s₂)
→ Dec (ShiftOver s c P s₁ s₂ sp))
shiftOverDec c dec s₁ s₂ sp = dec (c ∷ s₁) s₂ (cons c _ sp)
decHasSplit : (s : String){P : (s₁ s₂ : String) → Split s s₁ s₂ → Set}
→ ((s₁ s₂ : String)(sp : Split s s₁ s₂) → Dec (P s₁ s₂ sp))
→ Dec (HasSplit s P)
decHasSplit [] decP with decP [] [] (nil [])
decHasSplit [] decP | yes p = yes (exists [] [] (nil []) p)
decHasSplit [] decP | no ¬p = no contra
where contra : HasSplit [] _ → ⊥
contra (exists .[] .[] (nil .[]) x) = ¬p x
decHasSplit (x ∷ s) decP with decHasSplit s (shiftOverDec x decP) | decP _ _ (nil (x ∷ s))
decHasSplit (x ∷ s) decP | yes p | yes p₁ = yes (exists [] (x ∷ s) (nil (x ∷ s)) p₁)
decHasSplit (x ∷ s) decP | yes (exists s₁ s₂ sp x₁) | no ¬p = yes (exists (x ∷ s₁) s₂ (cons x s sp) x₁)
decHasSplit (x ∷ s) decP | no ¬p | yes p = yes (exists [] (x ∷ s) (nil (x ∷ s)) p)
decHasSplit (x ∷ s) decP | no ¬p | no ¬p₁ = no contra
where contra : HasSplit (x ∷ s) _ → ⊥
contra (exists .[] .(x ∷ s) (nil ._) x₁) = ¬p₁ x₁
contra (exists (.x ∷ s₁) s₂ (cons .x .s sp) x₁) = ¬p (exists s₁ s₂ sp x₁)
data StarR (r : Regex) (s₁ s₂ : String) : Set where
rstar : s₁ ∈ r → s₂ ∈ star r → StarR r s₁ s₂
rπ₁ : {r : Regex}{s₁ s₂ : String} → StarR r s₁ s₂ → s₁ ∈ r
rπ₁ (rstar p _) = p
rπ₂ : {r : Regex}{s₁ s₂ : String} → StarR r s₁ s₂ → s₂ ∈ star r
rπ₂ (rstar _ p) = p
data ThenR (r₁ r₂ : Regex) (s₁ s₂ : String) : Set where
rthen : s₁ ∈ r₁ → s₂ ∈ r₂ → ThenR r₁ r₂ s₁ s₂
trπ₁ : {r₁ r₂ : Regex}{s₁ s₂ : String} → ThenR r₁ r₂ s₁ s₂ → s₁ ∈ r₁
trπ₁ (rthen p _) = p
trπ₂ : {r₁ r₂ : Regex}{s₁ s₂ : String} → ThenR r₁ r₂ s₁ s₂ → s₂ ∈ r₂
trπ₂ (rthen _ p) = p
match : (s : String)(r : Regex) → Dec (s ∈ r)
{-# NO_TERMINATION_CHECK #-}
decStar : (r : Regex)(s s₁ s₂ : String)(sp : Split s s₁ s₂)
→ Dec (StarR r s₁ s₂)
decThen : (r₁ r₂ : Regex)(s s₁ s₂ : String)(sp : Split s s₁ s₂)
→ Dec (ThenR r₁ r₂ s₁ s₂)
decStar r s s₁ s₂ sp with match s₁ r | match s₂ (star r)
decStar r s s₁ s₂ sp | yes p | yes p₁ = yes (rstar p p₁)
decStar r s s₁ s₂ sp | yes p | no ¬p = no (λ p → ¬p (rπ₂ p))
decStar r s s₁ s₂ sp | no ¬p | p₂ = no (λ p → ¬p (rπ₁ p))
decThen r₁ r₂ s s₁ s₂ sp with match s₁ r₁ | match s₂ r₂
decThen r₁ r₂ s s₁ s₂ sp | yes p | yes p₁ = yes (rthen p p₁)
decThen r₁ r₂ s s₁ s₂ sp | yes p | no ¬p = no (λ p → ¬p (trπ₂ p))
decThen r₁ r₂ s s₁ s₂ sp | no ¬p | yes p = no (λ p → ¬p (trπ₁ p))
decThen r₁ r₂ s s₁ s₂ sp | no ¬p | no ¬p₁ = no (λ p → ¬p (trπ₁ p))
match s (both r r₁) with match s r | match s r₁
match s (both r r₁) | yes p | yes p₁ = yes (both s r r₁ p p₁)
match s (both r r₁) | yes p | no ¬p = no (λ p → ¬p (π₂ p))
match s (both r r₁) | no ¬p | yes p = no (λ p → ¬p (π₁ p))
match s (both r r₁) | no ¬p | no ¬p₁ = no (λ p → ¬p (π₁ p))
match s (or r r₁) with match s r | match s r₁
match s (or r r₁) | yes p | yes p₁ = yes (or1 s r r₁ p)
match s (or r r₁) | yes p | no ¬p = yes (or1 s r r₁ p)
match s (or r r₁) | no ¬p | yes p = yes (or2 s r r₁ p)
match s (or r r₁) | no ¬p | no ¬p₁ = no (λ p → either ¬p ¬p₁ p)
match [] (char x) = no (λ ())
match (x ∷ s) (char x₁) with x ≟ x₁
match (x ∷ []) (char .x) | yes refl = yes (char x)
match (x ∷ x₁ ∷ s) (char .x) | yes refl = no (λ ())
match (x ∷ s) (char x₁) | no ¬ = no (λ p → ¬-char ¬ p)
match s empty = no (λ ())
match [] (star r) = yes (starz r)
match (x ∷ s) (star r) with decHasSplit (x ∷ s) (decStar r (x ∷ s))
match (x ∷ s) (star r) | yes (exists s₁ s₂ sp (rstar x₁ x₂)) = yes (stars r (x ∷ s) s₁ s₂ sp x₁ x₂)
match (x ∷ s) (star r) | no ¬p = no contra
where contra : (x ∷ s) ∈ star r → ⊥
contra (stars .r .(x ∷ s) s1 s2 x₁ p p₁) = ¬p (exists s1 s2 x₁ (rstar p p₁))
match s (then r₁ r₂) with decHasSplit s (decThen r₁ r₂ s)
match s (then r₁ r₂) | yes (exists s₁ s₂ sp (rthen x x₁)) = yes (then r₁ r₂ s s₁ s₂ sp x x₁)
match s (then r₁ r₂) | no ¬p = no contra
where contra : s ∈ then r₁ r₂ → ⊥
contra (then .r₁ .r₂ .s s1 s2 x p p₁) = ¬p (exists s1 s2 x (rthen p p₁))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment