Skip to content

Instantly share code, notes, and snippets.

@heitor-lassarote
Last active September 21, 2022 15:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save heitor-lassarote/9cc4203990352f5df5f6b84da9291df9 to your computer and use it in GitHub Desktop.
Save heitor-lassarote/9cc4203990352f5df5f6b84da9291df9 to your computer and use it in GitHub Desktop.
Lexer.x and Parser.y: Solutions to exercises 1, 2, and 3
{
module Lexer
( -- * Invoking Alex
Alex
, AlexPosn (..)
, alexGetInput
, alexError
, runAlex
, alexMonadScan
, Range (..)
, RangedToken (..)
, Token (..)
, scanMany
) where
import Control.Monad (when)
import Data.ByteString.Lazy.Char8 (ByteString)
import qualified Data.ByteString.Lazy.Char8 as BS
import Data.Scientific (Scientific)
}
%wrapper "monadUserState-bytestring"
$digit = [0-9]
$alpha = [a-zA-Z]
@id = ($alpha | \_) ($alpha | $digit | \_ | \' | \?)*
@exponent = e (\- | \+)? $digit+
tokens :-
<0> $white+ ;
<0> "(*" { nestComment `andBegin` comment }
<0> "*)" { \_ _ -> alexError "Error: unexpected closing comment" }
<comment> "(*" { nestComment }
<comment> "*)" { unnestComment }
<comment> . ;
<comment> \n ;
-- Note that . doesn't match newlines, which is what we want.
<0> "#" .* ;
-- Keywords
<0> let { tok Let }
<0> in { tok In }
<0> if { tok If }
<0> then { tok Then }
<0> else { tok Else }
-- Arithmetic operators
<0> "+" { tok Plus }
<0> "-" { tok Minus }
<0> "*" { tok Times }
<0> "/" { tok Divide }
-- Comparison operators
<0> "=" { tok Eq }
<0> "<>" { tok Neq }
<0> "<" { tok Lt }
<0> "<=" { tok Le }
<0> ">" { tok Gt }
<0> ">=" { tok Ge }
-- Logical operators
<0> "&" { tok And }
<0> "|" { tok Or }
-- Parenthesis
<0> "(" { tok LPar }
<0> ")" { tok RPar }
-- Lists
<0> "[" { tok LBrack }
<0> "]" { tok RBrack }
<0> "," { tok Comma }
-- Types
<0> ":" { tok Colon }
<0> "->" { tok Arrow }
-- Projection
<0> \. { tok Dot }
-- Exercise 3 bonus:
-- Pattern matching
<0> match { tok Match }
<0> with { tok With }
-- Identifiers
<0> @id { tokId }
-- Constants
-- Exercise 1:
<0> $digit+ (\. $digit+)? @exponent? { tokNumber }
<0> \" { enterString `andBegin` string }
<string> \" { exitString `andBegin` 0 }
<string> \\\\ { emit '\\' }
<string> \\\" { emit '"' }
<string> \\n { emit '\n' }
<string> \\t { emit '\t' }
<string> . { emitCurrent }
{
data AlexUserState = AlexUserState
{ nestLevel :: Int
, strStart :: AlexPosn
, strBuffer :: [Char]
}
alexInitUserState :: AlexUserState
alexInitUserState = AlexUserState
{ nestLevel = 0
, strStart = AlexPn 0 0 0
, strBuffer = []
}
get :: Alex AlexUserState
get = Alex $ \s -> Right (s, alex_ust s)
put :: AlexUserState -> Alex ()
put s' = Alex $ \s -> Right (s{alex_ust = s'}, ())
modify :: (AlexUserState -> AlexUserState) -> Alex ()
modify f = Alex $ \s -> Right (s{alex_ust = f (alex_ust s)}, ())
alexEOF :: Alex RangedToken
alexEOF = do
(pos, _, _, _) <- alexGetInput
startCode <- alexGetStartCode
when (startCode == comment) $
alexError "Error: unclosed comment"
when (startCode == string) $
alexError "Error: unclosed string"
pure $ RangedToken EOF (Range pos pos)
data Range = Range
{ start :: AlexPosn
, stop :: AlexPosn
} deriving (Eq, Show)
data RangedToken = RangedToken
{ rtToken :: Token
, rtRange :: Range
} deriving (Eq, Show)
data Token
-- Identifiers
= Identifier ByteString
-- Constants
| String ByteString
| Number Scientific
-- Keywords
| Let
| In
| If
| Then
| Else
-- Arithmetic operators
| Plus
| Minus
| Times
| Divide
-- Comparison operators
| Eq
| Neq
| Lt
| Le
| Gt
| Ge
-- Logical operators
| And
| Or
-- Parenthesis
| LPar
| RPar
-- Lists
| Comma
| LBrack
| RBrack
-- Types
| Colon
| Arrow
| VBar
-- Projection
| Dot
-- Patern matching
| Match
| With
-- EOF
| EOF
deriving (Eq, Show)
mkRange :: AlexInput -> Int64 -> Range
mkRange (start, _, str, _) len = Range{start = start, stop = stop}
where
stop = BS.foldl' alexMove start $ BS.take len str
tok :: Token -> AlexAction RangedToken
tok ctor inp len =
pure RangedToken
{ rtToken = ctor
, rtRange = mkRange inp len
}
tokId, tokNumber :: AlexAction RangedToken
tokId inp@(_, _, str, _) len =
pure RangedToken
{ rtToken = Identifier $ BS.take len str
, rtRange = mkRange inp len
}
tokNumber inp@(_ ,_, str, _) len =
pure RangedToken
{ rtToken = Number $ read $ BS.unpack $ BS.take len str
, rtRange = mkRange inp len
}
enterString, exitString :: AlexAction RangedToken
enterString inp@(pos, _, _, _) len = do
modify $ \s -> s{strStart = pos, strBuffer = '"' : strBuffer s}
skip inp len
exitString inp@(pos, _, _, _) len = do
s <- get
put s{strStart = AlexPn 0 0 0, strBuffer = []}
pure RangedToken
{ rtToken = String $ BS.pack $ reverse $ '"' : strBuffer s
, rtRange = Range (strStart s) (alexMove pos '"')
}
emit :: Char -> AlexAction RangedToken
emit c inp@(_, _, str, _) len = do
modify $ \s -> s{strBuffer = c : strBuffer s}
skip inp len
emitCurrent :: AlexAction RangedToken
emitCurrent inp@(_, _, str, _) len = do
modify $ \s -> s{strBuffer = BS.head str : strBuffer s}
skip inp len
nestComment, unnestComment :: AlexAction RangedToken
nestComment input len = do
modify $ \s -> s{nestLevel = nestLevel s + 1}
skip input len
unnestComment input len = do
state <- get
let level = nestLevel state - 1
put state{nestLevel = level}
when (level == 0) $
alexSetStartCode 0
skip input len
scanMany :: ByteString -> Either String [RangedToken]
scanMany input = runAlex input go
where
go = do
output <- alexMonadScan
if rtToken output == EOF
then pure [output]
else (output :) <$> go
}
{
{-# LANGUAGE DeriveFoldable #-}
module Parser
( parseMiniML
) where
import Data.ByteString.Lazy.Char8 (ByteString)
import Data.Maybe (fromJust)
import Data.Monoid (First (..))
import Data.List.NonEmpty (NonEmpty)
import qualified Data.List.NonEmpty as NE
import Data.Scientific (Scientific)
import qualified Lexer as L
}
%name parseMiniML decs
%tokentype { L.RangedToken }
%error { parseError }
%monad { L.Alex } { >>= } { pure }
%lexer { lexer } { L.RangedToken L.EOF _ }
%expect 0
%token
-- Identifiers
identifier { L.RangedToken (L.Identifier _) _ }
-- Constants
string { L.RangedToken (L.String _) _ }
number { L.RangedToken (L.Number _) _ }
-- Keywords
let { L.RangedToken L.Let _ }
in { L.RangedToken L.In _ }
if { L.RangedToken L.If _ }
then { L.RangedToken L.Then _ }
else { L.RangedToken L.Else _ }
-- Arithmetic operators
'+' { L.RangedToken L.Plus _ }
'-' { L.RangedToken L.Minus _ }
'*' { L.RangedToken L.Times _ }
'/' { L.RangedToken L.Divide _ }
-- Comparison operators
'=' { L.RangedToken L.Eq _ }
'<>' { L.RangedToken L.Neq _ }
'<' { L.RangedToken L.Lt _ }
'<=' { L.RangedToken L.Le _ }
'>' { L.RangedToken L.Gt _ }
'>=' { L.RangedToken L.Ge _ }
-- Logical operators
'&' { L.RangedToken L.And _ }
'|' { L.RangedToken L.Or _ }
-- Parenthesis
'(' { L.RangedToken L.LPar _ }
')' { L.RangedToken L.RPar _ }
-- Lists
'[' { L.RangedToken L.LBrack _ }
']' { L.RangedToken L.RBrack _ }
',' { L.RangedToken L.Comma _ }
-- Types
':' { L.RangedToken L.Colon _ }
'->' { L.RangedToken L.Arrow _ }
-- Projection
'.' { L.RangedToken L.Dot _ }
-- Pattern matching
match { L.RangedToken L.Match _ }
with { L.RangedToken L.With _ }
%right else in
%right '->'
%left '|'
%left '&'
%nonassoc '=' '<>' '<' '>' '<=' '>='
%left '+' '-'
%left '*' '/'
%%
optional(p)
: { Nothing }
| p { Just $1 }
many_rev(p)
: { [] }
| many_rev(p) p { $2 : $1 }
many(p)
: many_rev(p) { reverse $1 }
sepBy_rev(p, sep)
: { [] }
| sepBy_rev(p, sep) sep p { $3 : $1 }
sepBy(p, sep)
: sepBy_rev(p, sep) { reverse $1 }
name :: { Name L.Range }
: identifier { unTok $1 (\range (L.Identifier name) -> Name range name) }
type :: { Type L.Range }
: name { TVar (info $1) $1 }
| '(' ')' { TUnit (L.rtRange $1 <-> L.rtRange $2) }
| '(' type ')' { TPar (L.rtRange $1 <-> L.rtRange $3) $2 }
| '[' type ']' { TList (L.rtRange $1 <-> L.rtRange $3) $2 }
| type '->' type { TArrow (info $1 <-> info $3) $1 $3 }
typeAnnotation :: { Type L.Range }
: ':' type { $2 }
dec :: { Dec L.Range }
: let name many(pat) optional(typeAnnotation) '=' exp { Dec (L.rtRange $1 <-> info $6) $2 $3 $4 $6 }
decs :: { [Dec L.Range] }
: many(dec) { $1 }
exp :: { Exp L.Range }
: expapp { $1 }
| expcond { $1 }
| '-' exp { ENeg (L.rtRange $1 <-> info $2) $2 }
-- Arithmetic operators
| exp '+' exp { EBinOp (info $1 <-> info $3) $1 (Plus (L.rtRange $2)) $3 }
| exp '-' exp { EBinOp (info $1 <-> info $3) $1 (Minus (L.rtRange $2)) $3 }
| exp '*' exp { EBinOp (info $1 <-> info $3) $1 (Times (L.rtRange $2)) $3 }
| exp '/' exp { EBinOp (info $1 <-> info $3) $1 (Divide (L.rtRange $2)) $3 }
-- Comparison operators
| exp '=' exp { EBinOp (info $1 <-> info $3) $1 (Eq (L.rtRange $2)) $3 }
| exp '<>' exp { EBinOp (info $1 <-> info $3) $1 (Neq (L.rtRange $2)) $3 }
| exp '<' exp { EBinOp (info $1 <-> info $3) $1 (Lt (L.rtRange $2)) $3 }
| exp '<=' exp { EBinOp (info $1 <-> info $3) $1 (Le (L.rtRange $2)) $3 }
| exp '>' exp { EBinOp (info $1 <-> info $3) $1 (Gt (L.rtRange $2)) $3 }
| exp '>=' exp { EBinOp (info $1 <-> info $3) $1 (Ge (L.rtRange $2)) $3 }
-- Logical operators
| exp '&' exp { EBinOp (info $1 <-> info $3) $1 (And (L.rtRange $2)) $3 }
| exp '|' exp { EBinOp (info $1 <-> info $3) $1 (Or (L.rtRange $2)) $3 }
| dec in exp { ELetIn (info $1 <-> info $3) $1 $3 }
-- Exercise 3 bonus:
-- Note: Suppose we have this:
-- match 0 with | 0 -> match 1 with | 1 -> 1 | 2 -> 2
-- Shifting will cause it to parse like so:
-- match 0 with | 0 -> (match 1 with | 1 -> 1 | 2 -> 2)
-- Reducing would give:
-- (match 0 with | 0 -> match 1 with | 1 -> 1) | 2 -> 2
| match exp with many_rev(case) %shift
{
let
cases = reverse $4
endPos = maybe (L.rtRange $3) (info . NE.last) (NE.nonEmpty cases)
in
EMatch (L.rtRange $1 <-> endPos) $2 cases
}
case :: { Case L.Range }
: '|' pat '->' exp { Case (L.rtRange $1 <-> info $4) $2 $4 }
expapp :: { Exp L.Range }
: expapp projatom { EApp (info $1 <-> info $2) $1 $2 }
| projatom { $1 }
expcond :: { Exp L.Range }
: if exp then exp %shift { EIfThen (L.rtRange $1 <-> info $4) $2 $4 }
| if exp then exp else exp { EIfThenElse (L.rtRange $1 <-> info $6) $2 $4 $6 }
-- Exercise 2:
projatom :: { Exp L.Range }
: atom many(projection)
{ maybe $1 (\projs -> EProj (info $1 <-> info (NE.last projs)) $1 projs) (NE.nonEmpty $2) }
atom :: { Exp L.Range }
: number { unTok $1 (\range (L.Number num) -> ENumber range num) }
| name { EVar (info $1) $1 }
| string { unTok $1 (\range (L.String string) -> EString range string) }
| '(' ')' { EUnit (L.rtRange $1 <-> L.rtRange $2) }
| '[' sepBy(exp, ',') ']' { EList (L.rtRange $1 <-> L.rtRange $3) $2 }
| '(' exp ')' { EPar (L.rtRange $1 <-> L.rtRange $3) $2 }
-- Arithmetic operators
| '(' '+' ')' { EOp (L.rtRange $1 <-> L.rtRange $3) (Plus (L.rtRange $2)) }
| '(' '-' ')' { EOp (L.rtRange $1 <-> L.rtRange $3) (Minus (L.rtRange $2)) }
| '(' '*' ')' { EOp (L.rtRange $1 <-> L.rtRange $3) (Times (L.rtRange $2)) }
| '(' '/' ')' { EOp (L.rtRange $1 <-> L.rtRange $3) (Divide (L.rtRange $2)) }
-- Comparison operators
| '(' '=' ')' { EOp (L.rtRange $1 <-> L.rtRange $3) (Eq (L.rtRange $2)) }
| '(' '<>' ')' { EOp (L.rtRange $1 <-> L.rtRange $3) (Neq (L.rtRange $2)) }
| '(' '<' ')' { EOp (L.rtRange $1 <-> L.rtRange $3) (Lt (L.rtRange $2)) }
| '(' '<=' ')' { EOp (L.rtRange $1 <-> L.rtRange $3) (Le (L.rtRange $2)) }
| '(' '>' ')' { EOp (L.rtRange $1 <-> L.rtRange $3) (Gt (L.rtRange $2)) }
| '(' '>=' ')' { EOp (L.rtRange $1 <-> L.rtRange $3) (Ge (L.rtRange $2)) }
-- Logical operators
| '(' '&' ')' { EOp (L.rtRange $1 <-> L.rtRange $3) (And (L.rtRange $2)) }
| '(' '|' ')' { EOp (L.rtRange $1 <-> L.rtRange $3) (Or (L.rtRange $2)) }
-- Exercise 3:
pat :: { Pat L.Range }
: number { unTok $1 (\range (L.Number num) -> PNumber range num) }
| name { PVar (info $1) $1 }
| string { unTok $1 (\range (L.String string) -> PString range string) }
| '(' ')' { PUnit (L.rtRange $1 <-> L.rtRange $2) }
| '(' pat ')' { PPar (L.rtRange $1 <-> L.rtRange $3) $2 }
| '(' pat typeAnnotation ')' { PAnnot (L.rtRange $1 <-> L.rtRange $4) $2 $3 }
| '[' sepBy(pat, ',') ']' { PList (L.rtRange $1 <-> L.rtRange $3) $2 }
projection :: { Proj L.Range }
: '.' '(' exp ')' { ProjList (L.rtRange $1 <-> L.rtRange $4) $3 }
{
parseError :: L.RangedToken -> L.Alex a
parseError _ = do
(L.AlexPn _ line column, _, _, _) <- L.alexGetInput
L.alexError $ "Parse error at line " <> show line <> ", column " <> show column
lexer :: (L.RangedToken -> L.Alex a) -> L.Alex a
lexer = (=<< L.alexMonadScan)
-- | Build a simple node by extracting its token type and range.
unTok :: L.RangedToken -> (L.Range -> L.Token -> a) -> a
unTok (L.RangedToken tok range) ctor = ctor range tok
-- | Unsafely extracts the the metainformation field of a node.
info :: Foldable f => f a -> a
info = fromJust . getFirst . foldMap pure
-- | Performs the union of two ranges by creating a new range starting at the
-- start position of the first range, and stopping at the stop position of the
-- second range.
-- Invariant: The LHS range starts before the RHS range.
(<->) :: L.Range -> L.Range -> L.Range
L.Range a1 _ <-> L.Range _ b2 = L.Range a1 b2
-- * AST
data Name a
= Name a ByteString
deriving (Foldable, Show)
data Type a
= TVar a (Name a)
| TPar a (Type a)
| TUnit a
| TList a (Type a)
| TArrow a (Type a) (Type a)
deriving (Foldable, Show)
data Dec a
= Dec a (Name a) [Pat a] (Maybe (Type a)) (Exp a)
deriving (Foldable, Show)
data Operator a
= Plus a
| Minus a
| Times a
| Divide a
| Eq a
| Neq a
| Lt a
| Le a
| Gt a
| Ge a
| And a
| Or a
deriving (Foldable, Show)
data Proj a
= ProjList a (Exp a)
deriving (Foldable, Show)
data Case a
= Case a (Pat a) (Exp a)
deriving (Foldable, Show)
data Exp a
= ENumber a Scientific
| EVar a (Name a)
| EString a ByteString
| EUnit a
| EList a [Exp a]
| EPar a (Exp a)
| EApp a (Exp a) (Exp a)
| EIfThen a (Exp a) (Exp a)
| EIfThenElse a (Exp a) (Exp a) (Exp a)
| ENeg a (Exp a)
| EBinOp a (Exp a) (Operator a) (Exp a)
| EOp a (Operator a)
| ELetIn a (Dec a) (Exp a)
| EProj a (Exp a) (NonEmpty (Proj a))
| EMatch a (Exp a) [Case a]
deriving (Foldable, Show)
data Pat a
= PNumber a Scientific
| PVar a (Name a)
| PString a ByteString
| PUnit a
| PPar a (Pat a)
| PAnnot a (Pat a) (Type a)
| PList a [Pat a]
deriving (Foldable, Show)
}
@heitor-lassarote
Copy link
Author

@Kleidukos, no problem. I've made an edit to the article warning about StrictData that hopefully will be merged soon, as it seems to be a common pitfall. I also did the same thing and got confused about the error in the past, hence I asked. :) One of my friends also fell into the same pitfall.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment