Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Implementing find for a regex subset
-- Regex subset: https://www.cs.princeton.edu/courses/archive/spr09/cos333/beautiful.html
-- Find the left-most occurence of pattern that matches regex
import Data.List(tails)
type Regex = String
type Match = String
find :: Regex -> String -> Match
find r s = unwrap $ mapUntil "" (findWord r) (tails $ wrap s)
where wrap xs = ('^':xs) ++ "$"
unwrap xs = [c | c <- xs, c `notElem` ['^', '$']]
mapUntil :: Match -> (String -> Match -> Match) -> [String] -> Match
mapUntil d _ [] = d
mapUntil d f (x:xs) = if m /= d then m else mapUntil d f xs
where m = f x ""
findWord :: Regex -> String -> Match -> Match
findWord (x:'*':xs) s@(y:ys) ms = findWord xs ys' (reverse ms'++ ms)
where (ms', ys') = if not (match x y) then ("", s) else splitWhile (matchStar x xs) s
findWord (x:xs) (y:ys) ms = if match x y then findWord xs ys (y:ms) else ""
findWord "" _ ms = reverse ms
findWord _ _ ms = ""
match :: Char -> Char -> Bool
match x y = x == '.' || x == y
matchStar :: Char -> String -> Char -> Bool
matchStar x [] y = match x y
matchStar x (x':xs) y = (x == '.' && x' /= y) || x == y
splitWhile :: (a -> Bool) -> [a] -> ([a], [a])
splitWhile f xs = (takeWhile f xs, dropWhile f xs)
-- Tests
triples = [("", "", ""),
("a", "", ""),
("", "a", ""),
("a", "a", "a"),
("ab", "a", ""),
("ab", "ab", "ab"),
("ab", "abc", "ab"),
("a*", "", ""),
("a*", "aaaa", "aaaa"),
("a*", "aaaab", "aaaa"),
("a*b", "b", "b"),
("a*b", "aaaabc", "aaaab"),
("^ab*c*d", "abbbcccdefg", "abbbcccd"),
("^ab*c*d", "zabbbcccdefg", ""),
("ab*c*d$", "xyzad", "ad"),
("^ab*c*d", "xyzadz", ""),
(".*", "abcdefg", "abcdefg"),
("^abc.*z$", "abcsz", "abcsz"),
("^abc.*z.*", "abcsz", "abcsz"),
("^abc.*z.*", "abcsza", "abcsza"),
("^abc.*z.*z$", "abcsz", "")]
test xs = let ys = map (\(a, b, c) -> (find a b) == c) xs
in (and ys, zip ys xs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.