Skip to content

Instantly share code, notes, and snippets.

@rusimody
Created October 29, 2012 16:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rusimody/3974401 to your computer and use it in GitHub Desktop.
Save rusimody/3974401 to your computer and use it in GitHub Desktop.
Shortened lexer
-- This lexer is not completely faithful to the Haskell lexical syntax.
-- Current limitations:
-- Qualified names are not handled properly
-- Octal and hexidecimal numerics are not recognized as a single token
-- Comments are not treated properly
lex :: ReadS String
lex "" = [("","")]
lex (c:s)
| isSpace c = lex (dropWhile isSpace s)
lex ('\'':s) = [('\'':ch++"'", t) | (ch,'\'':t) <- lexLitChar s, ch /= "'" ]
lex ('"':s) = [('"':str, t) | (str,t) <- lexString s]
where
lexString ('"':s) = [("\"",s)]
lexString s = [(ch++str, u) | (ch,t) <- lexStrItem s, (str,u) <- lexString t]
lexStrItem ('\\':'&':s) = [("\\&",s)]
lexStrItem ('\\':c:s) | isSpace c = [("\\&",t) | '\\':t <- [dropWhile isSpace s]]
lexStrItem s = lexLitChar s
lex (c:s) | isSingle c = [([c],s)]
| isSym c = [(c:sym,t) | (sym,t) <- [span isSym s]]
| isAlpha c = [(c:nam,t) | (nam,t) <- [span isIdChar s]]
| isDigit c = [(c:ds++fe,t) | (ds,s) <- [span isDigit s], (fe,t) <- lexFracExp s]
| otherwise = [] -- bad character
where
isSingle c = c `elem` ",;()[]{}_`"
isSym c = c `elem` "!@#$%&*+./<=>?\\^|:-~"
isIdChar c = isAlphaNum c || c `elem` "_'"
lexFracExp ('.':c:cs) | isDigit c = [('.':ds++e,u) | (ds,t) <- lexDigits (c:cs), (e,u) <- lexExp t]
lexFracExp s = lexExp s
lexExp (e:s) | e `elem` "eE" = [(e:c:ds,u) | (c:t) <- [s], c `elem` "+-", (ds,u) <- lexDigits t] ++
[(e:ds,t) | (ds,t) <- lexDigits s]
lexExp s = [("",s)]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment