Create a gist now

Instantly share code, notes, and snippets.

Embed
What would you like to do?
The Original Lexer from the standard prelude
-- This lexer is not completely faithful to the Haskell lexical syntax.
-- Current limitations:
-- Qualified names are not handled properly
-- Octal and hexidecimal numerics are not recognized as a single token
-- Comments are not treated properly
lex :: ReadS String
lex "" = [("","")]
lex (c:s)
| isSpace c = lex (dropWhile isSpace s)
lex ('\'':s) = [('\'':ch++"'", t) | (ch,'\'':t) <- lexLitChar s,
ch /= "'" ]
lex ('"':s) = [('"':str, t) | (str,t) <- lexString s]
where
lexString ('"':s) = [("\"",s)]
lexString s = [(ch++str, u)
| (ch,t) <- lexStrItem s,
(str,u) <- lexString t ]
lexStrItem ('\\':'&':s) = [("\\&",s)]
lexStrItem ('\\':c:s) | isSpace c
= [("\\&",t) |
'\\':t <-
[dropWhile isSpace s]]
lexStrItem s = lexLitChar s
lex (c:s) | isSingle c = [([c],s)]
| isSym c = [(c:sym,t) | (sym,t) <- [span isSym s]]
| isAlpha c = [(c:nam,t) | (nam,t) <- [span isIdChar s]]
| isDigit c = [(c:ds++fe,t) | (ds,s) <- [span isDigit s],
(fe,t) <- lexFracExp s ]
| otherwise = [] -- bad character
where
isSingle c = c `elem` ",;()[]{}_`"
isSym c = c `elem` "!@#$%&*+./<=>?\\^|:-~"
isIdChar c = isAlphaNum c || c `elem` "_'"
lexFracExp ('.':c:cs) | isDigit c
= [('.':ds++e,u) | (ds,t) <- lexDigits (c:cs),
(e,u) <- lexExp t]
lexFracExp s = lexExp s
lexExp (e:s) | e `elem` "eE"
= [(e:c:ds,u) | (c:t) <- [s], c `elem` "+-",
(ds,u) <- lexDigits t] ++
[(e:ds,t) | (ds,t) <- lexDigits s]
lexExp s = [("",s)]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment