Created
August 10, 2014 04:04
-
-
Save athanclark/10073071da5be5f01920 to your computer and use it in GitHub Desktop.
Lex File
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- -*- haskell -*- | |
-- This Alex file was machine-generated by the BNF converter | |
{ | |
{-# OPTIONS -fno-warn-incomplete-patterns #-} | |
module LexBNF where | |
import qualified Data.Bits | |
import Data.Word (Word8) | |
} | |
$l = [a-zA-Z\192 - \255] # [\215 \247] -- isolatin1 letter FIXME | |
$c = [A-Z\192-\221] # [\215] -- capital isolatin1 letter FIXME | |
$s = [a-z\222-\255] # [\247] -- small isolatin1 letter FIXME | |
$d = [0-9] -- digit | |
$i = [$l $d _ '] -- identifier character | |
$u = [\0-\255] -- universal: any character | |
@rsyms = -- symbols and non-identifier-like reserved words | |
\: \: \= | \| | |
:- | |
$white+ ; | |
@rsyms { tok (\p s -> PT p (eitherResIdent (TV . share) s)) } | |
\< ($l | $d | \_ | \-)* \> { tok (\p s -> PT p (eitherResIdent (T_NonTerminal . share) s)) } | |
$l $i* { tok (\p s -> PT p (eitherResIdent (TV . share) s)) } | |
\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \"{ tok (\p s -> PT p (TL $ share $ unescapeInitTail s)) } | |
{ | |
tok f p s = f p s | |
share :: String -> String | |
share = id | |
data Tok = | |
TS !String !Int -- reserved words and symbols | |
| TL !String -- string literals | |
| TI !String -- integer literals | |
| TV !String -- identifiers | |
| TD !String -- double precision float literals | |
| TC !String -- character literals | |
| T_NonTerminal !String | |
deriving (Eq,Show,Ord) | |
data Token = | |
PT Posn Tok | |
| Err Posn | |
deriving (Eq,Show,Ord) | |
tokenPos (PT (Pn _ l _) _ :_) = "line " ++ show l | |
tokenPos (Err (Pn _ l _) :_) = "line " ++ show l | |
tokenPos _ = "end of file" | |
tokenPosn (PT p _) = p | |
tokenPosn (Err p) = p | |
tokenLineCol = posLineCol . tokenPosn | |
posLineCol (Pn _ l c) = (l,c) | |
mkPosToken t@(PT p _) = (posLineCol p, prToken t) | |
prToken t = case t of | |
PT _ (TS s _) -> s | |
PT _ (TL s) -> s | |
PT _ (TI s) -> s | |
PT _ (TV s) -> s | |
PT _ (TD s) -> s | |
PT _ (TC s) -> s | |
PT _ (T_NonTerminal s) -> s | |
data BTree = N | B String Tok BTree BTree deriving (Show) | |
eitherResIdent :: (String -> Tok) -> String -> Tok | |
eitherResIdent tv s = treeFind resWords | |
where | |
treeFind N = tv s | |
treeFind (B a t left right) | s < a = treeFind left | |
| s > a = treeFind right | |
| s == a = t | |
resWords = b "|" 2 (b "::=" 1 N N) N | |
where b s n = let bs = id s | |
in B bs (TS bs n) | |
unescapeInitTail :: String -> String | |
unescapeInitTail = id . unesc . tail . id where | |
unesc s = case s of | |
'\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs | |
'\\':'n':cs -> '\n' : unesc cs | |
'\\':'t':cs -> '\t' : unesc cs | |
'"':[] -> [] | |
c:cs -> c : unesc cs | |
_ -> [] | |
------------------------------------------------------------------- | |
-- Alex wrapper code. | |
-- A modified "posn" wrapper. | |
------------------------------------------------------------------- | |
data Posn = Pn !Int !Int !Int | |
deriving (Eq, Show,Ord) | |
alexStartPos :: Posn | |
alexStartPos = Pn 0 1 1 | |
alexMove :: Posn -> Char -> Posn | |
alexMove (Pn a l c) '\t' = Pn (a+1) l (((c+7) `div` 8)*8+1) | |
alexMove (Pn a l c) '\n' = Pn (a+1) (l+1) 1 | |
alexMove (Pn a l c) _ = Pn (a+1) l (c+1) | |
type Byte = Word8 | |
type AlexInput = (Posn, -- current position, | |
Char, -- previous char | |
[Byte], -- pending bytes on the current char | |
String) -- current input string | |
tokens :: String -> [Token] | |
tokens str = go (alexStartPos, '\n', [], str) | |
where | |
go :: AlexInput -> [Token] | |
go inp@(pos, _, _, str) = | |
case alexScan inp 0 of | |
AlexEOF -> [] | |
AlexError (pos, _, _, _) -> [Err pos] | |
AlexSkip inp' len -> go inp' | |
AlexToken inp' len act -> act pos (take len str) : (go inp') | |
alexGetByte :: AlexInput -> Maybe (Byte,AlexInput) | |
alexGetByte (p, c, (b:bs), s) = Just (b, (p, c, bs, s)) | |
alexGetByte (p, _, [], s) = | |
case s of | |
[] -> Nothing | |
(c:s) -> | |
let p' = alexMove p c | |
(b:bs) = utf8Encode c | |
in p' `seq` Just (b, (p', c, bs, s)) | |
alexInputPrevChar :: AlexInput -> Char | |
alexInputPrevChar (p, c, bs, s) = c | |
-- | Encode a Haskell String to a list of Word8 values, in UTF8 format. | |
utf8Encode :: Char -> [Word8] | |
utf8Encode = map fromIntegral . go . ord | |
where | |
go oc | |
| oc <= 0x7f = [oc] | |
| oc <= 0x7ff = [ 0xc0 + (oc `Data.Bits.shiftR` 6) | |
, 0x80 + oc Data.Bits..&. 0x3f | |
] | |
| oc <= 0xffff = [ 0xe0 + (oc `Data.Bits.shiftR` 12) | |
, 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f) | |
, 0x80 + oc Data.Bits..&. 0x3f | |
] | |
| otherwise = [ 0xf0 + (oc `Data.Bits.shiftR` 18) | |
, 0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f) | |
, 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f) | |
, 0x80 + oc Data.Bits..&. 0x3f | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment