Created
January 30, 2014 22:55
-
-
Save almost/8721882 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- All the letters that can be matched by the regex, we only care | |
-- about capitals for this problem | |
allLetters = Set.fromList ['A'..'Z'] | |
compile :: String -> [Instruction Char] | |
compile xs = (compile' xs) ++ [End] | |
where compile' [] = [] | |
compile' xs = let (instr, rest) = compilePart xs in | |
instr ++ compile' rest | |
compilePart :: String -> ([Instruction Char], String) | |
compilePart ('(':xs) = let (instr, rest) = compileBrackets xs in | |
compileTail instr rest | |
compilePart ('[':'^':xs) = compileTail [Match (Set.difference allLetters (Set.fromList cls))] rest | |
where (cls, _:rest) = span (/=']') xs | |
compilePart ('[':xs) = compileTail [Match (Set.fromList cls)] rest | |
where (cls, _:rest) = span (/=']') xs | |
compilePart ('.':xs) = compileTail [Match allLetters] xs | |
-- NOTE: Turning backrefs into .*s at the moment!!!!! | |
compilePart ('\\':ref:xs) = compilePart ('.':'*':xs) | |
compilePart (x:xs) = compileTail [Match (Set.fromList [x])] xs | |
compilePart [] = ([],[]) | |
compileBrackets (')':xs) = ([], xs) | |
compileBrackets xs = let (instr, rest) = compilePart xs in | |
let (instr', rest') = compileBrackets rest in | |
(instr ++ instr', rest') | |
compileTail :: [Instruction Char] -> String -> ([Instruction Char], String) | |
compileTail instr ('*':xs) = (((Split 1 (2 + length instr)) : instr) ++ [Split 1 (-(length instr))], xs) | |
compileTail instr ('?':xs) = ((Split 1 (1 + length instr)) : instr, xs) | |
compileTail instr ('+':xs) = (instr ++ [Split 1 (-(length instr))], xs) | |
compileTail instr xs = (instr, xs) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment