-
-
Save darius/d59e1552a394f08688d566dc23c664a0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import qualified Data.Array as A | |
import Data.List (nub) | |
import Data.Monoid ((<>)) | |
spec = do | |
print (many (lit 'a') `shouldMatch` "aaa") | |
print (many (lit 'a' `alt` lit 'a') `shouldNotMatch` "aaacf") | |
print (many (lit 'a' `alt` lit 'b') `shouldMatch` "abababa") | |
print ((lit 'a' . many (lit 'b' `alt` lit 'c') . lit 'd') `shouldMatch` "abcbd") | |
shouldMatch re string = (re `matches` string) == True | |
shouldNotMatch re string = (re `matches` string) == False | |
type Regex = NFA -> NFA | |
type NFA = ([Int], [State]) -- (indices of start-states, states) | |
type State = (Char, [Int]) -- Meaning: if the current character matches, then move to all the following states | |
type StateArray = A.Array Int State | |
accept = -1 -- A dummy index meaning the accepting state. | |
matches :: Regex -> [Char] -> Bool | |
matches re cs = | |
let (starts, states) = re ([accept], []) | |
stateArray = A.listArray (0, length states - 1) states | |
ends = foldl step starts cs | |
step :: [Int] -> Char -> [Int] | |
step starts' c = nub $ concat $ map (after c stateArray) starts' | |
in any (accept ==) ends | |
after :: Char -> StateArray -> Int -> [Int] | |
after c states i = if i == accept then [] | |
else let (c', xs) = states A.! i in | |
if c' == c then xs else [] | |
lit :: Char -> Regex -- Make a literal-character regex | |
lit c (starts, states) = ([length states], states <> [(c, starts)]) | |
alt :: Regex -> Regex -> Regex -- Make an re1|re2 regex | |
alt re1 re2 nfa@(starts, _) = | |
let (starts1, states1) = re1 nfa | |
(starts2, states2) = re2 (starts, states1) in | |
(starts1 <> starts2, states2) | |
-- Definitely magic. N.B. this loops forever on many (many (lit 'a')), | |
-- but so did Thompson's version of this algorithm. | |
many :: Regex -> Regex -- Make an re* regex | |
many re (starts, states) = | |
let (loopStarts, loopStates) = re (resultStarts, states) | |
resultStarts = starts <> loopStarts in | |
(resultStarts, loopStates) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment