Skip to content

Instantly share code, notes, and snippets.

@adamsmasher
Created April 7, 2013 22:48
Show Gist options
  • Save adamsmasher/5332909 to your computer and use it in GitHub Desktop.
Save adamsmasher/5332909 to your computer and use it in GitHub Desktop.
Haskell Regex (NFA)
{-# LANGUAGE DoRec #-}
import Control.Monad.State
import Data.Set (Set)
import qualified Data.Set as Set
data Regex =
Literal Char
| Concat Regex Regex
| Altern Regex Regex
| Star Regex
deriving Show
data MatchState = NonMatch | Match deriving (Show, Eq, Ord)
type Transition = (String, NFAState)
type StepResult = Either (Set Transition) MatchState
type StateID = Int
-- an NFAState takes a String, and moves one step based on the first character
-- then returns a set of the next things to do, or a match if we're done
data NFAState = NFAState { stateId :: StateID, runNFA :: String -> StepResult }
instance Eq NFAState where
a == b = (stateId a) == (stateId b)
instance Ord NFAState where
a <= b = (stateId a) <= (stateId b)
instance Show NFAState where
show = show . stateId
makeNFA :: Regex -> NFAState -> NFAState
makeNFA r k = evalState (makeNFA' r k) 0
addIdToNFA :: (String -> StepResult) -> State Int NFAState
addIdToNFA f = do
curId <- get
put $ curId + 1
return $ NFAState curId f
makeNFA' :: Regex -> NFAState -> State Int NFAState
makeNFA' (Literal c) k = addIdToNFA $ \str ->
case str of
(c':cs) -> if c == c' then Left (Set.singleton (cs, k)) else Right NonMatch
[] -> Right NonMatch
makeNFA' (Concat r1 r2) k = makeNFA' r1 =<< makeNFA' r2 k
makeNFA' (Altern r1 r2) k = do
left <- makeNFA' r1 k
right <- makeNFA' r2 k
addIdToNFA $ \str ->
Left $ (Set.singleton (str, left)) `Set.union` (Set.singleton (str, right))
makeNFA' (Star r) k = do
rec self <- addIdToNFA $ \str -> Left $ Set.union
(Set.singleton (str, k))
(Set.singleton (str, more))
more <- makeNFA' r self
return self
checkDone :: NFAState
checkDone = NFAState (-1) $ \str -> Right $ case str of
"" -> Match
_ -> NonMatch
match :: Regex -> String -> Bool
match r = matchNFA $ makeNFA r checkDone
matchNFA :: NFAState -> String -> Bool
matchNFA start str = matchLoop $ Set.singleton (str, start)
matchLoop :: Set Transition -> Bool
matchLoop states = case step states of
Left stepResult -> if Set.null stepResult then False else matchLoop stepResult
Right Match -> True
Right NonMatch -> False
step :: Set Transition -> StepResult
step states = foldResults $ Set.map (uncurry $ flip runNFA) states
foldResults :: Set StepResult -> StepResult
foldResults = Set.fold joinResults (Right NonMatch)
joinResults :: StepResult -> StepResult -> StepResult
joinResults _ (Right Match) = Right Match
joinResults (Right Match) _ = Right Match
joinResults l (Right NonMatch) = l
joinResults (Right NonMatch) r = r
joinResults (Left l) (Left r) = Left $ l `Set.union` r
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment