Skip to content

Instantly share code, notes, and snippets.

@heitor-lassarote
Last active September 22, 2023 15:46
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save heitor-lassarote/e56e5330368f2cf9265a6eefa8a7a880 to your computer and use it in GitHub Desktop.
Save heitor-lassarote/e56e5330368f2cf9265a6eefa8a7a880 to your computer and use it in GitHub Desktop.
Lexer.x: Strings
{
module Lexer
( -- * Invoking Alex
Alex
, AlexPosn (..)
, alexGetInput
, alexError
, runAlex
, alexMonadScan
, Range (..)
, RangedToken (..)
, Token (..)
, scanMany
) where
import Control.Monad (when)
import Data.ByteString.Lazy.Char8 (ByteString)
import qualified Data.ByteString.Lazy.Char8 as BS
}
%wrapper "monadUserState-bytestring"
$digit = [0-9]
$alpha = [a-zA-Z]
@id = ($alpha | \_) ($alpha | $digit | \_ | \' | \?)*
tokens :-
<0> $white+ ;
<0> "(*" { nestComment `andBegin` comment }
<0> "*)" { \_ _ -> alexError "Error: unexpected closing comment" }
<comment> "(*" { nestComment }
<comment> "*)" { unnestComment }
<comment> . ;
<comment> \n ;
-- Keywords
<0> let { tok Let }
<0> in { tok In }
<0> if { tok If }
<0> then { tok Then }
<0> else { tok Else }
-- Arithmetic operators
<0> "+" { tok Plus }
<0> "-" { tok Minus }
<0> "*" { tok Times }
<0> "/" { tok Divide }
-- Comparison operators
<0> "=" { tok Eq }
<0> "<>" { tok Neq }
<0> "<" { tok Lt }
<0> "<=" { tok Le }
<0> ">" { tok Gt }
<0> ">=" { tok Ge }
-- Logical operators
<0> "&" { tok And }
<0> "|" { tok Or }
-- Parenthesis
<0> "(" { tok LPar }
<0> ")" { tok RPar }
-- Lists
<0> "[" { tok LBrack }
<0> "]" { tok RBrack }
<0> "," { tok Comma }
-- Types
<0> ":" { tok Colon }
<0> "->" { tok Arrow }
-- Identifiers
<0> @id { tokId }
-- Constants
<0> $digit+ { tokInteger }
<0> \"[^\"]*\" { tokString }
{
data AlexUserState = AlexUserState
{ nestLevel :: Int
}
alexInitUserState :: AlexUserState
alexInitUserState = AlexUserState
{ nestLevel = 0
}
get :: Alex AlexUserState
get = Alex $ \s -> Right (s, alex_ust s)
put :: AlexUserState -> Alex ()
put s' = Alex $ \s -> Right (s{alex_ust = s'}, ())
modify :: (AlexUserState -> AlexUserState) -> Alex ()
modify f = Alex $ \s -> Right (s{alex_ust = f (alex_ust s)}, ())
alexEOF :: Alex RangedToken
alexEOF = do
(pos, _, _, _) <- alexGetInput
startCode <- alexGetStartCode
when (startCode == comment) $
alexError "Error: unclosed comment"
pure $ RangedToken EOF (Range pos pos)
data Range = Range
{ start :: AlexPosn
, stop :: AlexPosn
} deriving (Eq, Show)
data RangedToken = RangedToken
{ rtToken :: Token
, rtRange :: Range
} deriving (Eq, Show)
data Token
-- Identifiers
= Identifier ByteString
-- Constants
| String ByteString
| Integer Integer
-- Keywords
| Let
| In
| If
| Then
| Else
-- Arithmetic operators
| Plus
| Minus
| Times
| Divide
-- Comparison operators
| Eq
| Neq
| Lt
| Le
| Gt
| Ge
-- Logical operators
| And
| Or
-- Parenthesis
| LPar
| RPar
-- Lists
| Comma
| LBrack
| RBrack
-- Types
| Colon
| Arrow
-- EOF
| EOF
deriving (Eq, Show)
mkRange :: AlexInput -> Int64 -> Range
mkRange (start, _, str, _) len = Range{start = start, stop = stop}
where
stop = BS.foldl' alexMove start $ BS.take len str
tok :: Token -> AlexAction RangedToken
tok ctor inp len =
pure RangedToken
{ rtToken = ctor
, rtRange = mkRange inp len
}
tokId :: AlexAction RangedToken
tokId inp@(_, _, str, _) len =
pure RangedToken
{ rtToken = Identifier $ BS.take len str
, rtRange = mkRange inp len
}
tokInteger :: AlexAction RangedToken
tokInteger inp@(_, _, str, _) len =
pure RangedToken
{ rtToken = Integer $ read $ BS.unpack $ BS.take len str
, rtRange = mkRange inp len
}
tokString :: AlexAction RangedToken
tokString inp@(_, _, str, _) len =
pure RangedToken
{ rtToken = String $ BS.take len str
, rtRange = mkRange inp len
}
nestComment, unnestComment :: AlexAction RangedToken
nestComment input len = do
modify $ \s -> s{nestLevel = nestLevel s + 1}
skip input len
unnestComment input len = do
state <- get
let level = nestLevel state - 1
put state{nestLevel = level}
when (level == 0) $
alexSetStartCode 0
skip input len
scanMany :: ByteString -> Either String [RangedToken]
scanMany input = runAlex input go
where
go = do
output <- alexMonadScan
if rtToken output == EOF
then pure [output]
else (output :) <$> go
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment