Skip to content

Instantly share code, notes, and snippets.

@abailly
Created November 10, 2020 15:21
Show Gist options
  • Save abailly/071ecb8264cfda932361c2f09ccdaa8a to your computer and use it in GitHub Desktop.
Save abailly/071ecb8264cfda932361c2f09ccdaa8a to your computer and use it in GitHub Desktop.
||| A proof of the validity of matching algorithm for
||| regular expressions
|||
||| From https://arxiv.org/abs/2003.06458
module re
import Decidable.Equality
%default total
infix 7 .|
infix 6 ...
||| Representation of Regular expressions
data R : Type where
R_0 : R
R_1 : R
R_char : Char -> R
(.|) : R -> R -> R
(...) : R -> R -> R
R_star : R -> R
||| Matching algorithms
data R_match : List Char -> R -> Type where
R_match_zero : R_match s R_0
R_match_unit : R_match [] R_1
R_match_char : {c : Char} -> R_match (c :: _) (R_char c)
R_match_plus1 : {s : List Char} -> {r1, r2 : R} -> R_match s r1 -> R_match s (r1 .| r2)
R_match_plus2 : {s : List Char} -> {r1, r2 : R} -> R_match s r2 -> R_match s (r1 .| r2)
R_match_times : {s1, s2 : List Char} -> {r1, r2 : R} ->
R_match s1 r1 -> R_match s2 r2 -> R_match (s1 ++ s2) (r1 ... r2)
R_match_star1 : {r : R} -> R_match [] (R_star r)
R_match_star2 : {s1, s2 : List Char} -> {r : R} ->
R_match s1 r ->
R_match s2 (R_star r) -> R_match (s1 ++ s2) (R_star r)
data R_match_partial : (whole : List Char) -> (r : R) -> Type where
R_match_here : (matched, rest : List Char) -> (r_match : R_match matched r) -> R_match_partial (matched ++ rest) r
||| A proof that the list `ls` can be split in 2 parts
data Split : (ls : List a) -> Type where
IsSplit : (pref : List a) -> (suff : List a) -> Split (pref ++ suff)
prepend : (x : a) -> Split xs -> Split (x :: xs)
prepend x (IsSplit pref suff) = IsSplit (x :: pref) suff
splits : (ls : List a) -> List (Split ls)
splits ls = IsSplit [] ls :: splits' ls
where
splits' : (ls : List a) -> List (Split ls)
splits' [] = []
splits' (x :: xs) =
let subsplits = splits' xs
in IsSplit [x] xs :: map (prepend x) subsplits
mutual
check_partial : (re : R) -> (Either String (R_match_partial s re), List Char) -> Split s -> (Either String (R_match_partial s re), List Char)
check_partial _ r@(Right _, _) _ = r
check_partial re (Left err, _) (IsSplit pref suff) with (accept pref re)
check_partial re (Left err, _) (IsSplit pref suff) | (Left l) = (Left l, suff)
check_partial re (Left err, _) (IsSplit pref suff) | (Right x) = (Right (R_match_here pref suff x), suff)
accept_partial : (r : R) -> (s : List Char)
-> Either String (R_match_partial s r)
accept_partial r s =
let subs = splits s
in fst $ foldl (check_partial r) (Left "no match", the (List Char) []) subs
accept : (s : List Char) -> (r : R) -> Either String (R_match s r)
accept s R_0 = Right R_match_zero
accept [] R_1 = Right R_match_unit
accept s R_1 = Left $ "'" ++ pack s ++ "' is not empty"
accept [y] (R_char x) with (decEq x y)
accept [y] (R_char x) | (Yes prf) = rewrite prf in Right R_match_char
accept [y] (R_char x) | (No contra) = Left $ show y ++ " does not match " ++ show x
accept [] (R_char x) = Left $ "empty string cannot match char " ++ show x
accept s (x .| y) with (accept s x)
accept s (x .| y) | (Left l) with (accept s y)
accept s (x .| y) | (Left l) | (Left z) = Left l
accept s (x .| y) | (Left l) | (Right r) = Right (R_match_plus2 r)
accept s (x .| y) | (Right r) = Right (R_match_plus1 r)
accept s (x ... y) with (accept_partial x s)
accept s (x ... y) | (Left l) = Left l
accept (matched ++ rest) (x ... y) | (Right (R_match_here matched rest r_match)) with (accept rest y)
accept (matched ++ rest) (x ... y) | (Right (R_match_here matched rest r_match)) | (Left l) = Left l
accept (matched ++ rest) (x ... y) | (Right (R_match_here matched rest r_match)) | (Right r) = Right $ R_match_times r_match r
-- empty string matches <re>*
accept [] (R_star x) = Right $ R_match_star1
-- non empty string matches <re>* if a prefix matches <re>
-- and the rest matches <re>*
accept s (R_star x) with (accept_partial x s)
accept s (R_star x) | (Left l) = Left l
accept (matched ++ []) (R_star x) | (Right (R_match_here matched [] r_match)) = Right $ R_match_star2 r_match R_match_star1
accept (matched ++ rest) (R_star x) | (Right (R_match_here matched rest r_match)) with (assert_total $ accept rest (R_star x))
accept (matched ++ rest) (R_star x) | (Right (R_match_here matched rest r_match)) | (Left l) = (Left l)
accept (matched ++ rest) (R_star x) | (Right (R_match_here matched rest r_match)) | (Right r) = Right $ R_match_star2 r_match r
accept s _ = Left $ "failed to match " ++ pack s
namespace Test
test_splits : splits [1,2] = [IsSplit [] [1,2], IsSplit [1] [2], IsSplit [1,2] []]
test_splits = Refl
test_empty : accept [] R_1 = Right R_match_unit
test_empty = Refl
test_seq_match_1 : accept ['a','b'] (R_char 'a' ... R_char 'b') = Right (R_match_times {s1=['a']} {s2=['b']} R_match_char R_match_char)
test_seq_match_1 = Refl
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment