Last active
June 2, 2024 03:16
-
-
Save evanrelf/8645a7cb3d8febf216ab0037b7c83b34 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- https://www.youtube.com/watch?v=dDtZLm7HIJs | |
{-# LANGUAGE BlockArguments #-} | |
module Main (main) where | |
import Control.Applicative | |
import Control.Monad (when) | |
import Data.Char | |
import Data.Foldable (for_) | |
import System.Exit (die) | |
main :: IO () | |
main = do | |
let examples = | |
[ parse digit "123" ==> [('1', "23")] | |
, parse digit "abc" ==> [] | |
, parse (char 'a') "abc" ==> [('a', "bc")] | |
, parse (char 'a') "123" ==> [] | |
, parse (some digit) "123" ==> [("123", "")] | |
, parse (digit <|> letter) "abc123" ==> [('a', "bc123")] | |
, parse (some (digit <|> letter)) "abc123" ==> [("abc123", "")] | |
, parse expr "2+3*4" ==> [(14, "")] | |
, parse expr "(2+3)*4" ==> [(20,"")] | |
, parse expr "(2+(7*10)+8)*20" ==> [(1600, "")] | |
, parse expr "2+3*" ==> [(5, "*")] | |
, parse expr "(2+3" ==> [] | |
] | |
for_ examples \(Example actual expected) -> do | |
if actual == expected | |
then putStrLn $ "PASS " <> show actual | |
else putStrLn $ "FAIL expected: " <> show expected <> ", actual: " <> show actual | |
expr :: Parser Int | |
expr = | |
do | |
x <- term | |
char '+' | |
y <- expr | |
pure (x + y) | |
<|> term | |
term :: Parser Int | |
term = | |
do | |
x <- factor | |
char '*' | |
y <- term | |
pure (x * y) | |
<|> factor | |
factor :: Parser Int | |
factor = | |
do | |
char '(' | |
x <- expr | |
char ')' | |
pure x | |
<|> int | |
data Example = forall a. (Eq a, Show a) => Example a a | |
(==>) :: (Eq a, Show a) => a -> a -> Example | |
(==>) = Example | |
-------------------------------------------------------------------------------- | |
-- https://www.cs.nott.ac.uk/~pszgmh/Parsing.hs | |
-------------------------------------------------------------------------------- | |
-- Functional parsing library from chapter 13 of Programming in Haskell, | |
-- Graham Hutton, Cambridge University Press, 2016. | |
-- Basic definitions | |
newtype Parser a = P (String -> [(a,String)]) | |
parse :: Parser a -> String -> [(a,String)] | |
parse (P p) inp = p inp | |
item :: Parser Char | |
item = P (\inp -> case inp of | |
[] -> [] | |
(x:xs) -> [(x,xs)]) | |
-- Sequencing parsers | |
instance Functor Parser where | |
-- fmap :: (a -> b) -> Parser a -> Parser b | |
fmap g p = P (\inp -> case parse p inp of | |
[] -> [] | |
[(v,out)] -> [(g v, out)]) | |
instance Applicative Parser where | |
-- pure :: a -> Parser a | |
pure v = P (\inp -> [(v,inp)]) | |
-- <*> :: Parser (a -> b) -> Parser a -> Parser b | |
pg <*> px = P (\inp -> case parse pg inp of | |
[] -> [] | |
[(g,out)] -> parse (fmap g px) out) | |
instance Monad Parser where | |
-- (>>=) :: Parser a -> (a -> Parser b) -> Parser b | |
p >>= f = P (\inp -> case parse p inp of | |
[] -> [] | |
[(v,out)] -> parse (f v) out) | |
-- Making choices | |
instance Alternative Parser where | |
-- empty :: Parser a | |
empty = P (\inp -> []) | |
-- (<|>) :: Parser a -> Parser a -> Parser a | |
p <|> q = P (\inp -> case parse p inp of | |
[] -> parse q inp | |
[(v,out)] -> [(v,out)]) | |
-- Derived primitives | |
sat :: (Char -> Bool) -> Parser Char | |
sat p = do x <- item | |
if p x then return x else empty | |
digit :: Parser Char | |
digit = sat isDigit | |
lower :: Parser Char | |
lower = sat isLower | |
upper :: Parser Char | |
upper = sat isUpper | |
letter :: Parser Char | |
letter = sat isAlpha | |
alphanum :: Parser Char | |
alphanum = sat isAlphaNum | |
char :: Char -> Parser Char | |
char x = sat (== x) | |
string :: String -> Parser String | |
string [] = return [] | |
string (x:xs) = do char x | |
string xs | |
return (x:xs) | |
ident :: Parser String | |
ident = do x <- lower | |
xs <- many alphanum | |
return (x:xs) | |
nat :: Parser Int | |
nat = do xs <- some digit | |
return (read xs) | |
int :: Parser Int | |
int = do char '-' | |
n <- nat | |
return (-n) | |
<|> nat | |
-- Handling spacing | |
space :: Parser () | |
space = do many (sat isSpace) | |
return () | |
token :: Parser a -> Parser a | |
token p = do space | |
v <- p | |
space | |
return v | |
identifier :: Parser String | |
identifier = token ident | |
natural :: Parser Int | |
natural = token nat | |
integer :: Parser Int | |
integer = token int | |
symbol :: String -> Parser String | |
symbol xs = token (string xs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import * as R from "https://deno.land/x/ramda@v0.27.2/mod.ts"; | |
import { None, Option, Some } from "https://deno.land/x/opt@0.2.2/option.ts"; | |
const main = (): void => { | |
const examples = [ | |
[parse(digit, "123"), Some(["1", "23"])], | |
[parse(digit, "abc"), None], | |
[parse(char("a"), "abc"), Some(["a", "bc"])], | |
[parse(char("a"), "123"), None], | |
[parse(some(digit).map(concat), "123"), Some(["123", ""])], | |
[parse(digit.or(letter), "abc123"), Some(["a", "bc123"])], | |
[parse(some(digit.or(letter)).map(concat), "abc123"), Some(["abc123", ""])], | |
[parse(expr(), "2+3*4"), Some([14, ""])], | |
[parse(expr(), "(2+3)*4"), Some([20, ""])], | |
[parse(expr(), "(2+(7*10)+8)*20"), Some([1600, ""])], | |
[parse(expr(), "2+3*"), Some([5, "*"])], | |
[parse(expr(), "(2+3"), None], | |
]; | |
for (const [actual, expected] of examples) { | |
if (R.equals(actual, expected)) { | |
console.log("PASS", actual); | |
} else { | |
console.log("FAIL expected:", expected, "actual:", actual); | |
} | |
} | |
}; | |
// These have to be functions returning parsers, rather than parser values, so | |
// that they can refer to each other. Otherwise, JavaScript complains that the | |
// ones before can't refer to ones after (including stuff further down, like | |
// `int`). | |
// deno-fmt-ignore | |
const expr = (): Parser<number> => | |
term() .andThen( | |
x => char("+") .andThen( | |
_ => expr() .andThen( | |
y => pure(x + y) ))) | |
.or( term() ); | |
// deno-fmt-ignore | |
const term = (): Parser<number> => | |
factor() .andThen( | |
x => char("*") .andThen( | |
_ => term() .andThen( | |
y => pure(x * y) ))) | |
.or( factor() ); | |
// deno-fmt-ignore | |
const factor = (): Parser<number> => | |
char("(") .andThen( | |
_ => expr() .andThen( | |
x => char(")") .andThen( | |
_ => pure(x) ))) | |
.or( int ); | |
class Parser<A> { | |
run: (input: string) => Option<[A, string]>; | |
constructor(run: (input: string) => Option<[A, string]>) { | |
this.run = run; | |
} | |
map = <B>(f: (x: A) => B): Parser<B> => fmap(f, this); | |
then = <B>(py: Parser<B>): Parser<B> => bind(this, (input) => py); | |
andThen = <B>(k: (x: A) => Parser<B>): Parser<B> => bind(this, k); | |
or = (pr: Parser<A>): Parser<A> => alt(this, pr); | |
} | |
type char = string; | |
const uncons = (s: string): Option<[char, string]> => { | |
const c = s.charAt(0); | |
const cs = s.substring(1); | |
if (c === "") { | |
return None; | |
} else { | |
return Some([c, cs]); | |
} | |
}; | |
const concat = (cs: char[]): string => cs.join(""); | |
const parse = <A>(parser: Parser<A>, input: string): Option<[A, string]> => | |
parser.run(input); | |
const item: Parser<char> = new Parser((input) => uncons(input)); | |
const fmap = <A, B>(f: (x: A) => B, px: Parser<A>): Parser<B> => | |
new Parser( | |
(input) => parse(px, input).map(([x, rest]) => [f(x), rest]), | |
); | |
const pure = <A>(x: A): Parser<A> => | |
new Parser( | |
(input) => Some([x, input]), | |
); | |
const apply = <A, B>(pf: Parser<(x: A) => B>, px: Parser<A>): Parser<B> => | |
new Parser( | |
(input) => parse(pf, input).chain(([f, rest]) => parse(fmap(f, px), rest)), | |
); | |
const liftA2 = <A, B, C>( | |
f: (x: A, y: B) => C, | |
px: Parser<A>, | |
py: Parser<B>, | |
): Parser<C> => apply(fmap((x): (y: B) => C => (y) => f(x, y), px), py); | |
const bind = <A, B>(px: Parser<A>, k: (x: A) => Parser<B>): Parser<B> => | |
new Parser( | |
(input) => parse(px, input).chain(([x, rest]) => parse(k(x), rest)), | |
); | |
const empty: Parser<any> = new Parser((input) => None); | |
const alt = <A>(pl: Parser<A>, pr: Parser<A>): Parser<A> => | |
new Parser( | |
(input) => parse(pl, input).chainNone(() => parse(pr, input)), | |
); | |
const satisfy = (f: (c: char) => Boolean): Parser<char> => | |
item.andThen((c) => f(c) ? pure(c) : empty); | |
const satisfyRegex = (r: RegExp): Parser<char> => satisfy((c) => r.test(c)); | |
const many = <A>(p: Parser<A>): Parser<A[]> => | |
new Parser((input0) => { | |
let input = R.clone(input0); | |
let values = []; | |
while (true) { | |
const result = parse(p, input); | |
if (result.isSome()) { | |
const [value, rest] = result.value; | |
values.push(value); | |
input = rest; | |
} else { | |
return Some([values, input]); | |
} | |
} | |
}); | |
const some = <A>(p: Parser<A>): Parser<A[]> => | |
new Parser((input0) => { | |
let input = R.clone(input0); | |
let values = []; | |
while (true) { | |
const result = parse(p, input); | |
if (result.isSome()) { | |
const [value, rest] = result.value; | |
values.push(value); | |
input = rest; | |
} else { | |
if (values.length == 0) { | |
return None; | |
} else { | |
return Some([values, input]); | |
} | |
} | |
} | |
}); | |
const digit: Parser<char> = satisfyRegex(/[0-9]/); | |
const lower: Parser<char> = satisfyRegex(/[a-z]/); | |
const upper: Parser<char> = satisfyRegex(/[A-Z]/); | |
const letter: Parser<char> = lower.or(upper); | |
const alphanum: Parser<char> = letter.or(digit); | |
const char = (c: char): Parser<char> => { | |
if (c.length !== 1) { | |
throw new Error("char: Input string is not a single character"); | |
} | |
return satisfy((i) => i == c); | |
}; | |
const string = (s: string): Parser<string> => | |
uncons(s).match( | |
([c, cs]) => char(c).then(string(cs)).then(pure(s)), | |
() => empty, | |
); | |
const ident: Parser<string> = lower.andThen((c) => | |
many(alphanum).map((cs) => c + cs) | |
); | |
const nat: Parser<number> = some(digit).map((s): number => | |
parseInt(s.join(""), 10) | |
); | |
const int: Parser<number> = char("-").then(nat).map((n) => -n).or(nat); | |
const space: Parser<null> = many(satisfyRegex(/ /)).map((c) => null); | |
const token = <A>(p: Parser<A>): Parser<A> => | |
space.then(p).andThen((x) => space.then(pure(x))); | |
const identifier: Parser<string> = token(ident); | |
const natural: Parser<number> = token(nat); | |
const integer: Parser<number> = token(int); | |
const symbol = (s: string): Parser<string> => token(string(s)); | |
const optional = <A>(p: Parser<A>): Parser<Option<A>> => | |
p.map(Some).or(pure(None)); | |
main(); |
Author
evanrelf
commented
Jun 2, 2024
•
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment