Skip to content

Instantly share code, notes, and snippets.

@darius
Forked from emhoracek/nfa.hs
Last active May 1, 2016 03:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save darius/d59e1552a394f08688d566dc23c664a0 to your computer and use it in GitHub Desktop.
Save darius/d59e1552a394f08688d566dc23c664a0 to your computer and use it in GitHub Desktop.
import qualified Data.Array as A
import Data.List (nub)
import Data.Monoid ((<>))
spec = do
print (many (lit 'a') `shouldMatch` "aaa")
print (many (lit 'a' `alt` lit 'a') `shouldNotMatch` "aaacf")
print (many (lit 'a' `alt` lit 'b') `shouldMatch` "abababa")
print ((lit 'a' . many (lit 'b' `alt` lit 'c') . lit 'd') `shouldMatch` "abcbd")
shouldMatch re string = (re `matches` string) == True
shouldNotMatch re string = (re `matches` string) == False
type Regex = NFA -> NFA
type NFA = ([Int], [State]) -- (indices of start-states, states)
type State = (Char, [Int]) -- Meaning: if the current character matches, then move to all the following states
type StateArray = A.Array Int State
accept = -1 -- A dummy index meaning the accepting state.
matches :: Regex -> [Char] -> Bool
matches re cs =
let (starts, states) = re ([accept], [])
stateArray = A.listArray (0, length states - 1) states
ends = foldl step starts cs
step :: [Int] -> Char -> [Int]
step starts' c = nub $ concat $ map (after c stateArray) starts'
in any (accept ==) ends
after :: Char -> StateArray -> Int -> [Int]
after c states i = if i == accept then []
else let (c', xs) = states A.! i in
if c' == c then xs else []
lit :: Char -> Regex -- Make a literal-character regex
lit c (starts, states) = ([length states], states <> [(c, starts)])
alt :: Regex -> Regex -> Regex -- Make an re1|re2 regex
alt re1 re2 nfa@(starts, _) =
let (starts1, states1) = re1 nfa
(starts2, states2) = re2 (starts, states1) in
(starts1 <> starts2, states2)
-- Definitely magic. N.B. this loops forever on many (many (lit 'a')),
-- but so did Thompson's version of this algorithm.
many :: Regex -> Regex -- Make an re* regex
many re (starts, states) =
let (loopStarts, loopStates) = re (resultStarts, states)
resultStarts = starts <> loopStarts in
(resultStarts, loopStates)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment