Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Implementing find for a regex subset
-- Regex subset: https://www.cs.princeton.edu/courses/archive/spr09/cos333/beautiful.html
-- Find the left-most occurence of pattern that matches regex
import Data.List(tails)
type Regex = String
type Match = String
find :: Regex -> String -> Match
find r s = unwrap $ mapUntil "" (findWord r) (tails $ wrap s)
where wrap xs = ('^':xs) ++ "$"
unwrap xs = [c | c <- xs, c `notElem` ['^', '$']]
mapUntil :: Match -> (String -> Match -> Match) -> [String] -> Match
mapUntil d _ [] = d
mapUntil d f (x:xs) = if m /= d then m else mapUntil d f xs
where m = f x ""
findWord :: Regex -> String -> Match -> Match
findWord (x:'*':xs) s@(y:ys) ms = findWord xs ys' (reverse ms'++ ms)
where (ms', ys') = if not (match x y) then ("", s) else splitWhile (matchStar x xs) s
findWord (x:xs) (y:ys) ms = if match x y then findWord xs ys (y:ms) else ""
findWord "" _ ms = reverse ms
findWord _ _ ms = ""
match :: Char -> Char -> Bool
match x y = x == '.' || x == y
matchStar :: Char -> String -> Char -> Bool
matchStar x [] y = match x y
matchStar x (x':xs) y = (x == '.' && x' /= y) || x == y
splitWhile :: (a -> Bool) -> [a] -> ([a], [a])
splitWhile f xs = (takeWhile f xs, dropWhile f xs)
-- Tests
triples = [("", "", ""),
("a", "", ""),
("", "a", ""),
("a", "a", "a"),
("ab", "a", ""),
("ab", "ab", "ab"),
("ab", "abc", "ab"),
("a*", "", ""),
("a*", "aaaa", "aaaa"),
("a*", "aaaab", "aaaa"),
("a*b", "b", "b"),
("a*b", "aaaabc", "aaaab"),
("^ab*c*d", "abbbcccdefg", "abbbcccd"),
("^ab*c*d", "zabbbcccdefg", ""),
("ab*c*d$", "xyzad", "ad"),
("^ab*c*d", "xyzadz", ""),
(".*", "abcdefg", "abcdefg"),
("^abc.*z$", "abcsz", "abcsz"),
("^abc.*z.*", "abcsz", "abcsz"),
("^abc.*z.*", "abcsza", "abcsza"),
("^abc.*z.*z$", "abcsz", "")]
test xs = let ys = map (\(a, b, c) -> (find a b) == c) xs
in (and ys, zip ys xs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment