Skip to content

Instantly share code, notes, and snippets.

@freddi301
Created July 16, 2024 04:56
Show Gist options
  • Save freddi301/1e3eae77e8bb9e54070ca7a392c7ad64 to your computer and use it in GitHub Desktop.
Save freddi301/1e3eae77e8bb9e54070ca7a392c7ad64 to your computer and use it in GitHub Desktop.
Idris 2 Parser based on combinators
module Parser
import Data.Nat
import Data.List
%default total
-- interface Codec input output where
-- encode : input -> output
-- decode : output -> Maybe input
-- EncodeDecode : (x : input) -> decode (encode x) = Just x
public export
data Outcome : Type -> Type -> Type where
Failure : error -> (position : Nat) -> Outcome error output
Success : output -> (consumed : Nat) -> Outcome error output
export
Eq error => Eq output =>
Eq (Outcome error output) where
(==) (Failure reason position) (Failure reason' position') = reason == reason' && position == position'
(==) (Success result consumed) (Success result' consumed') = result == result' && consumed == consumed'
(==) _ _ = False
export
Show error => Show output =>
Show (Outcome error output) where
show (Failure reason position) = "Failure (" ++ show reason ++ ") " ++ show position
show (Success result consumed) = "Success (" ++ show result ++ ") " ++ show consumed
namespace Outcome
public export
(>>=) : Outcome error output_x -> (output_x -> Nat -> Outcome error output_y) -> Outcome error output_y
Failure reason position >>= binder = Failure reason position
Success result consumed >>= binder = binder result consumed
export
record Parser input error output where
constructor MakeParser
run : List input -> Outcome error output
public export interface ParserErrorEndOfInputExpected error where endOfInputExpected : error
export
parse :
ParserErrorEndOfInputExpected error =>
Parser input error output -> List input -> Outcome error output
parse parser tokens = parser.run tokens >>= \result, consumed =>
if consumed == length tokens then Success result consumed else Failure endOfInputExpected consumed
--- Basics
export
fail : error -> Parser input error output
fail reason = MakeParser $ \tokens =>
Failure reason Z
export
yield : output -> Parser input error output
yield result = MakeParser $ \tokens =>
Success result Z
public export interface ParserErrorUnexpectedEndOfInput error where unexpectedEndOfInput : error
export
take :
ParserErrorUnexpectedEndOfInput error =>
Parser input error input
take = MakeParser $ \tokens => case tokens of
[] => Failure unexpectedEndOfInput Z
token :: tokens => Success token (S Z)
export
(>>=) : Parser input error output_x -> (output_x -> Parser input error output_y) -> Parser input error output_y
(>>=) parser binder = MakeParser $ \tokens =>
parser.run tokens >>= \result_parser, consumed_parser =>
case (binder result_parser).run (drop consumed_parser tokens) of
Failure reason position => Failure reason (consumed_parser + position)
Success result_binder consumed_binder => Success result_binder (consumed_parser + consumed_binder)
public export interface ParserErrorNoAlternative error where noAlternative : (first : (error, Nat)) -> (second : (error, Nat)) -> error
export
(<|>) :
ParserErrorNoAlternative error =>
Parser input error output -> Parser input error output -> Parser input error output
(<|>) first second = MakeParser $ \tokens =>
case first.run tokens of
Failure reason_first position_first => case second.run tokens of
Failure reason_second position_second => Failure (noAlternative (reason_first, position_first) (reason_second, position_second)) Z
outcome => outcome
outcome => outcome
repeatRec : (max : Maybe Nat) -> Parser input error output -> Nat -> List input -> (List output, Nat)
repeatRec (Just Z) parser p tokens = ([], 0)
repeatRec max parser (S Z) (token :: tokens) = case parser.run tokens of
Failure reason position => ([], 0)
Success result consumed_head =>
let (results, consumed_tail) = repeatRec (pred <$> max) parser consumed_head tokens in
(result :: results, consumed_head + consumed_tail)
repeatRec max parser (S p) (token :: tokens) = repeatRec max parser p tokens
repeatRec max parser p tokens = ([], 0)
export
repeat :
{default Nothing max : Maybe Nat} ->
Parser input error output -> Parser input error (List output)
repeat {max = Just Z} parser = yield []
repeat {max} parser = MakeParser $ \tokens => case parser.run tokens of
Failure reason position => Success [] 0
Success result consumed_head =>
let (results, consumed_tail) = repeatRec (pred <$> max) parser consumed_head tokens in
Success (result :: results) (consumed_head + consumed_tail)
--- Utils
export
(>>) : Parser input error output_x -> Parser input error output_y -> Parser input error output_y
first >> second = first >>= const second
export infixl 1 <<
export
(<<) : Parser input error output_x -> Parser input error output_y -> Parser input error output_x
first << second = first >>= \result => second >>= const (yield result)
export
(<$>) : (output_x -> output_y) -> Parser input error output_x -> Parser input error output_y
mapper <$> parser = parser >>= \result => yield $ mapper result
export
map : (output_x -> output_y) -> Parser input error output_x -> Parser input error output_y
map = (<$>)
export
sequence : List (Parser input error output) -> Parser input error (List output)
sequence [] = yield []
sequence (parser :: parsers) = parser >>= \result => sequence parsers >>= \results => yield (result :: results)
public export interface ParserErrorNotMatched error input where notMatched : (token : input) -> (expected : input) -> error
export
match :
ParserErrorUnexpectedEndOfInput error => ParserErrorNotMatched error input =>
Eq input => input -> Parser input error input
match expected = MakeParser $ \tokens => case tokens of
[] => Failure unexpectedEndOfInput Z
(token :: tokens) => if token == expected then Success token (S Z) else Failure (notMatched token expected) Z
export
matches :
ParserErrorUnexpectedEndOfInput error => ParserErrorNotMatched error Char =>
String -> Parser Char error String
matches expected = map pack $ sequence $ map Parser.match $ unpack expected
public export interface ParserErrorNotSatisfied error input where notSatisfied : (token : input) -> (predicate : String) -> error
export
satisfy :
ParserErrorUnexpectedEndOfInput error => ParserErrorNotSatisfied error input => {default "" name: String} ->
(input -> Bool) -> Parser input error input
satisfy predicate = MakeParser $ \tokens => case tokens of
[] => Failure unexpectedEndOfInput Z
(token :: tokens) => if predicate token then Success token (S Z) else Failure (notSatisfied token name) Z
namespace Satisfy
export
(||) : (input -> Bool) -> (input -> Bool) -> (input -> Bool)
(||) predicate_a predicate_b = \token => predicate_a token || predicate_b token
export
optional :
ParserErrorNoAlternative error =>
Parser input error output -> Parser input error (Maybe output)
optional parser = (Just <$> parser) <|> (yield Nothing)
export
separatedBy :
ParserErrorNoAlternative error =>
Parser input error _ -> Parser input error output -> Parser input error (List output)
separatedBy separator parser = parser >>= \result => repeat (separator >> parser) >>= \results => yield (result :: results)
public export interface ParserErrorNotEnoughItems error where notEnoughItems : (actual : Nat) -> (parser : String) -> (min : Nat) -> error
export
min :
ParserErrorNotEnoughItems error => {default "" name : String} ->
Nat -> Parser input error (List output) -> Parser input error (List output)
min n parser = parser >>= \results => if length results >= n then yield results else fail $ notEnoughItems (length results) name n
public export interface ParserErrorRecursionLimit error where recursionLimit : error
export
recursive :
ParserErrorRecursionLimit error =>
Nat -> (Parser input error output -> Parser input error output) -> Parser input error output
recursive Z factory = factory $ fail recursionLimit
recursive (S p) factory = factory (recursive p factory)
--- Test
data TestsError : Type -> Type where
EndOfInputExpected : TestsError input
UnexpectedEndOfInput : TestsError input
NoAlternative : (first : (TestsError input, Nat)) -> (second : (TestsError input, Nat)) -> TestsError input
NotMatched : (token : input) -> (expected : input) -> TestsError input
NotSatisfied : (token : input) -> (predicate : String) -> TestsError input
RecursionLimit : TestsError input
NotEnoughItems : (actual : Nat) -> (parser : String) -> (min : Nat) -> TestsError input
Custom : TestsError input
ParserErrorEndOfInputExpected (TestsError input) where endOfInputExpected = EndOfInputExpected
ParserErrorUnexpectedEndOfInput (TestsError input) where unexpectedEndOfInput = UnexpectedEndOfInput
ParserErrorNoAlternative (TestsError input) where noAlternative = NoAlternative
ParserErrorNotMatched (TestsError input) input where notMatched = NotMatched
ParserErrorNotSatisfied (TestsError input) input where notSatisfied = NotSatisfied
ParserErrorNotEnoughItems (TestsError input) where notEnoughItems = NotEnoughItems
ParserErrorRecursionLimit (TestsError input) where recursionLimit = RecursionLimit
Test_1_parser : Parser Char (TestsError Char) (List Char)
Test_1_parser = repeat take
Test_1_result : Outcome (TestsError Char) (List Char)
Test_1_result = parse Test_1_parser (unpack "abc")
Test_1 : Test_1_result = Success (unpack "abc") 3
Test_1 = Refl
Test_2_parser : Parser Char (TestsError Char) (List String)
Test_2_parser = repeat (take >>= \a => take >>= \b => yield $ pack [a, b])
Test_2_result : Outcome (TestsError Char) (List String)
Test_2_result = parse Test_2_parser (unpack "abcd")
Test_2 : Test_2_result = Success ["ab", "cd"] 4
Test_2 = Refl
Test_3_parser : Parser Char (TestsError Char) (List (List Char))
Test_3_parser = repeat $ repeat $ take
Test_3_result : Outcome (TestsError Char) (List (List Char))
Test_3_result = parse Test_3_parser (unpack "abc")
Test_3 : Test_3_result = Success [unpack "abc", []] 3
Test_3 = Refl
Test_4_parser : Parser Char (TestsError Char) String
Test_4_parser = pack . (:: []) <$> take
Test_4_result : Outcome (TestsError Char) String
Test_4_result = parse Test_4_parser (unpack "a")
Test_4 : Test_4_result = Success "a" 1
Test_4 = Refl
Test_5_parser : Parser Char (TestsError Char) (List String)
Test_5_parser = separatedBy ((repeat $ satisfy isSpace) >> match ',' >> (repeat $ satisfy isSpace)) (pack <$> repeat (satisfy isAlpha))
Test_5_result : Outcome (TestsError Char) (List String)
Test_5_result = parse Test_5_parser (unpack "one, two, three , four")
Test_5 : Test_5_result = Success ["one", "two", "three", "four"] 23
Test_5 = Refl
Test_6_parser : Parser Char (TestsError Char) Char
Test_6_parser = satisfy {name="isAlpha"} isAlpha
Test_6_result_1 : Outcome (TestsError Char) Char
Test_6_result_1 = parse Test_6_parser (unpack "a")
Test_6_1 : Test_6_result_1 = Success 'a' 1
Test_6_1 = Refl
Test_6_result_2 : Outcome (TestsError Char) (Char)
Test_6_result_2 = parse Test_6_parser (unpack "4")
Test_6_2 : Test_6_result_2 = Failure (notSatisfied '4' "isAlpha") 0
Test_6_2 = Refl
data Test7Tree : Type -> Type where
Test7Leaf : value -> Test7Tree value
Test7Branch : Test7Tree value -> Test7Tree value -> Test7Tree value
Test7ParserType = Parser Char (TestsError Char) (Test7Tree String)
parameters (Test_7_parser_recursive : Test7ParserType)
mutual
Test_7_parser_not_recursive : Test7ParserType
Test_7_parser_not_recursive = Test_7_parser_branch <|> Test_7_parser_leaf
Test_7_parser_leaf : Test7ParserType
Test_7_parser_leaf = Test7Leaf <$> pack <$> repeat (satisfy isAlpha)
Test_7_parser_branch : Test7ParserType
Test_7_parser_branch = do
match '('
left <- Test_7_parser_recursive
match ' '
right <- Test_7_parser_recursive
match ')'
yield $ Test7Branch left right
Test_7_parser : Test7ParserType
Test_7_parser = recursive 2 Test_7_parser_not_recursive
Test_7_result : Outcome (TestsError Char) (Test7Tree String)
Test_7_result = parse Test_7_parser (unpack "(a (b c))")
Test_7 : Test_7_result = Success (Test7Branch (Test7Leaf "a") (Test7Branch (Test7Leaf "b") (Test7Leaf "c"))) 9
Test_7 = Refl
Test_8_parser : Parser Char (TestsError Char) String
Test_8_parser = take >>= \a => take >>= \b => yield $ pack [a, b]
Test_8_result_1 : Outcome (TestsError Char) String
Test_8_result_1 = parse Test_8_parser (unpack "ab")
Test_8_1 : Test_8_result_1 = Success "ab" 2
Test_8_1 = Refl
Test_8_result_2 : Outcome (TestsError Char) String
Test_8_result_2 = parse Test_8_parser (unpack "abc")
Test_8_2 : Test_8_result_2 = Failure EndOfInputExpected 2
Test_8_2 = Refl
Test_8_result_3 : Outcome (TestsError Char) String
Test_8_result_3 = parse Test_8_parser (unpack "a")
Test_8_3 : Test_8_result_3 = Failure UnexpectedEndOfInput 1
Test_8_3 = Refl
Test_9_parser : Parser Char (TestsError Char) String
Test_9_parser = take >>= \a => fail $ Custom
Test_9_result_1 : Outcome (TestsError Char) String
Test_9_result_1 = parse Test_9_parser (unpack "a")
Test_9_1 : Test_9_result_1 = Failure Custom 1
Test_9_1 = Refl
Test_9_result_2 : Outcome (TestsError Char) String
Test_9_result_2 = parse Test_9_parser (unpack "ab")
Test_9_2 : Test_9_result_2 = Failure Custom 1
Test_9_2 = Refl
Test_10_parser : Parser Char (TestsError Char) (Maybe Char)
Test_10_parser = (match 'a') >> optional (match 'b')
Test_10_result_1 : Outcome (TestsError Char) (Maybe Char)
Test_10_result_1 = parse Test_10_parser (unpack "a")
Test_10_1 : Test_10_result_1 = Success Nothing 1
Test_10_1 = Refl
Test_10_result_2 : Outcome (TestsError Char) (Maybe Char)
Test_10_result_2 = parse Test_10_parser (unpack "ab")
Test_10_2 : Test_10_result_2 = Success (Just 'b') 2
Test_10_2 = Refl
Test_11_parser : Parser Char (TestsError Char) (List String)
Test_11_parser = min 1 $ repeat $ ((repeat $ satisfy isSpace) >> (map pack $ min 1 $ repeat $ satisfy isAlpha))
Test_11_result_1 : Outcome (TestsError Char) (List String)
Test_11_result_1 = parse Test_11_parser (unpack "a bb ccc")
Test_11_1 : Test_11_result_1 = Success ["a", "bb", "ccc"] 9
Test_11_1 = Refl
Test_12_parser : Parser Nat (TestsError Nat) (List Nat)
Test_12_parser = repeat $ map sum $ min 1 $ repeat {max = Just 2} take
Test_12_result : Outcome (TestsError Nat) (List Nat)
Test_12_result = parse Test_12_parser [1, 2, 3, 4, 5, 6]
Test_12 : Test_12_result = Success [3, 7, 11] 6
Test_12 = Refl
Test_13_parser : Parser Char (TestsError Char) (List String)
Test_13_parser = repeat $ do
item <- map pack $ repeat $ satisfy isAlpha
match ','
yield item
Test_13_result : Outcome (TestsError Char) (List String)
Test_13_result = parse Test_13_parser (unpack "aa,bb,cc,")
test_13 : Test_13_result = Success ["aa", "bb", "cc"] 9
test_13 = Refl
data Test_14_Tree : Type where
Leaf : String -> Test_14_Tree
Branch : List Test_14_Tree -> Test_14_Tree
Test_14_parser : Parser Char (TestsError Char) Test_14_Tree
Test_14_parser = recursive 2 $ \recParser =>
(do match '('; items <- separatedBy ((repeat $ satisfy isSpace) >> (match ',') >> (repeat $ satisfy isSpace)) recParser; match ')'; yield $ Branch items) <|>
(do chars <- min 1 $ repeat $ satisfy isAlpha; yield $ Leaf $ pack chars)
Test_14_result : Outcome (TestsError Char) Test_14_Tree
Test_14_result = parse Test_14_parser (unpack "(a, (b, c), d)")
Test_14 : Test_14_result = Success (Branch [Leaf "a", Branch [Leaf "b", Leaf "c"], Leaf "d"]) 14
Test_14 = Refl
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment