evanrelf/parser-combinators.hs

## parser-combinators.hs
-- https://www.youtube.com/watch?v=dDtZLm7HIJs

{-# LANGUAGE BlockArguments #-}

module Main (main) where

import Control.Applicative
import Control.Monad (when)
import Data.Char
import Data.Foldable (for_)
import System.Exit (die)

main :: IO ()
main = do
  let examples =
        [ parse digit "123"                        ==> [('1', "23")]
        , parse digit "abc"                        ==> []
        , parse (char 'a') "abc"                   ==> [('a', "bc")]
        , parse (char 'a') "123"                   ==> []
        , parse (some digit) "123"                 ==> [("123", "")]
        , parse (digit <|> letter) "abc123"        ==> [('a', "bc123")]
        , parse (some (digit <|> letter)) "abc123" ==> [("abc123", "")]
        , parse expr "2+3*4"                       ==> [(14, "")]
        , parse expr "(2+3)*4"                     ==> [(20,"")]
        , parse expr "(2+(7*10)+8)*20"             ==> [(1600, "")]
        , parse expr "2+3*"                        ==> [(5, "*")]
        , parse expr "(2+3"                        ==> []
        ]

  for_ examples \(Example actual expected) -> do
    if actual == expected
      then putStrLn $ "PASS " <> show actual
      else putStrLn $ "FAIL expected: " <> show expected <> ", actual: " <> show actual

expr :: Parser Int
expr =
  do
    x <- term
    char '+'
    y <- expr
    pure (x + y)
  <|> term

term :: Parser Int
term =
  do
    x <- factor
    char '*'
    y <- term
    pure (x * y)
  <|> factor

factor :: Parser Int
factor =
  do
    char '('
    x <- expr
    char ')'
    pure x
  <|> int

data Example = forall a. (Eq a, Show a) => Example a a

(==>) :: (Eq a, Show a) => a -> a -> Example
(==>) = Example

--------------------------------------------------------------------------------
-- https://www.cs.nott.ac.uk/~pszgmh/Parsing.hs
--------------------------------------------------------------------------------

-- Functional parsing library from chapter 13 of Programming in Haskell,
-- Graham Hutton, Cambridge University Press, 2016.

-- Basic definitions

newtype Parser a = P (String -> [(a,String)])

parse :: Parser a -> String -> [(a,String)]
parse (P p) inp = p inp

item :: Parser Char
item = P (\inp -> case inp of
                     []     -> []
                     (x:xs) -> [(x,xs)])

-- Sequencing parsers

instance Functor Parser where
   -- fmap :: (a -> b) -> Parser a -> Parser b
   fmap g p = P (\inp -> case parse p inp of
                            []        -> []
                            [(v,out)] -> [(g v, out)])

instance Applicative Parser where
   -- pure :: a -> Parser a
   pure v = P (\inp -> [(v,inp)])

   -- <*> :: Parser (a -> b) -> Parser a -> Parser b
   pg <*> px = P (\inp -> case parse pg inp of
                             []        -> []
                             [(g,out)] -> parse (fmap g px) out)

instance Monad Parser where
   -- (>>=) :: Parser a -> (a -> Parser b) -> Parser b
   p >>= f = P (\inp -> case parse p inp of
                           []        -> []
                           [(v,out)] -> parse (f v) out)

-- Making choices

instance Alternative Parser where
   -- empty :: Parser a
   empty = P (\inp -> [])

   -- (<|>) :: Parser a -> Parser a -> Parser a
   p <|> q = P (\inp -> case parse p inp of
                           []        -> parse q inp
                           [(v,out)] -> [(v,out)])

-- Derived primitives

sat :: (Char -> Bool) -> Parser Char
sat p = do x <- item
           if p x then return x else empty

digit :: Parser Char
digit = sat isDigit

lower :: Parser Char
lower = sat isLower

upper :: Parser Char
upper = sat isUpper

letter :: Parser Char
letter = sat isAlpha

alphanum :: Parser Char
alphanum = sat isAlphaNum

char :: Char -> Parser Char
char x = sat (== x)

string :: String -> Parser String
string []     = return []
string (x:xs) = do char x
                   string xs
                   return (x:xs)

ident :: Parser String
ident = do x  <- lower
           xs <- many alphanum
           return (x:xs)

nat :: Parser Int
nat = do xs <- some digit
         return (read xs)

int :: Parser Int
int = do char '-'
         n <- nat
         return (-n)
       <|> nat

-- Handling spacing

space :: Parser ()
space = do many (sat isSpace)
           return ()

token :: Parser a -> Parser a
token p = do space
             v <- p
             space
             return v

identifier :: Parser String
identifier = token ident

natural :: Parser Int
natural = token nat

integer :: Parser Int
integer = token int

symbol :: String -> Parser String
symbol xs = token (string xs)

## parser-combinators.ts
import * as R from "https://deno.land/x/ramda@v0.27.2/mod.ts";
import { None, Option, Some } from "https://deno.land/x/opt@0.2.2/option.ts";

const main = (): void => {
  const examples = [
    [parse(digit, "123"), Some(["1", "23"])],
    [parse(digit, "abc"), None],
    [parse(char("a"), "abc"), Some(["a", "bc"])],
    [parse(char("a"), "123"), None],
    [parse(some(digit).map(concat), "123"), Some(["123", ""])],
    [parse(digit.or(letter), "abc123"), Some(["a", "bc123"])],
    [parse(some(digit.or(letter)).map(concat), "abc123"), Some(["abc123", ""])],
    [parse(expr(), "2+3*4"), Some([14, ""])],
    [parse(expr(), "(2+3)*4"), Some([20, ""])],
    [parse(expr(), "(2+(7*10)+8)*20"), Some([1600, ""])],
    [parse(expr(), "2+3*"), Some([5, "*"])],
    [parse(expr(), "(2+3"), None],
  ];

  for (const [actual, expected] of examples) {
    if (R.equals(actual, expected)) {
      console.log("PASS", actual);
    } else {
      console.log("FAIL expected:", expected, "actual:", actual);
    }
  }
};

// These have to be functions returning parsers, rather than parser values, so
// that they can refer to each other. Otherwise, JavaScript complains that the
// ones before can't refer to ones after (including stuff further down, like
// `int`).

// deno-fmt-ignore
const expr = (): Parser<number> =>
          term()         .andThen(
  x =>    char("+")      .andThen(
  _ =>    expr()         .andThen(
  y =>    pure(x + y)    )))
  .or(    term()         );

// deno-fmt-ignore
const term = (): Parser<number> =>
          factor()       .andThen(
  x =>    char("*")      .andThen(
  _ =>    term()         .andThen(
  y =>    pure(x * y)    )))
  .or(    factor()       );

// deno-fmt-ignore
const factor = (): Parser<number> =>
          char("(")    .andThen(
  _ =>    expr()       .andThen(
  x =>    char(")")    .andThen(
  _ =>    pure(x)      )))
  .or(    int          );

class Parser<A> {
  run: (input: string) => Option<[A, string]>;

  constructor(run: (input: string) => Option<[A, string]>) {
    this.run = run;
  }

  map = <B>(f: (x: A) => B): Parser<B> => fmap(f, this);

  then = <B>(py: Parser<B>): Parser<B> => bind(this, (input) => py);

  andThen = <B>(k: (x: A) => Parser<B>): Parser<B> => bind(this, k);

  or = (pr: Parser<A>): Parser<A> => alt(this, pr);
}

type char = string;

const uncons = (s: string): Option<[char, string]> => {
  const c = s.charAt(0);
  const cs = s.substring(1);
  if (c === "") {
    return None;
  } else {
    return Some([c, cs]);
  }
};

const concat = (cs: char[]): string => cs.join("");

const parse = <A>(parser: Parser<A>, input: string): Option<[A, string]> =>
  parser.run(input);

const item: Parser<char> = new Parser((input) => uncons(input));

const fmap = <A, B>(f: (x: A) => B, px: Parser<A>): Parser<B> =>
  new Parser(
    (input) => parse(px, input).map(([x, rest]) => [f(x), rest]),
  );

const pure = <A>(x: A): Parser<A> =>
  new Parser(
    (input) => Some([x, input]),
  );

const apply = <A, B>(pf: Parser<(x: A) => B>, px: Parser<A>): Parser<B> =>
  new Parser(
    (input) => parse(pf, input).chain(([f, rest]) => parse(fmap(f, px), rest)),
  );

const liftA2 = <A, B, C>(
  f: (x: A, y: B) => C,
  px: Parser<A>,
  py: Parser<B>,
): Parser<C> => apply(fmap((x): (y: B) => C => (y) => f(x, y), px), py);

const bind = <A, B>(px: Parser<A>, k: (x: A) => Parser<B>): Parser<B> =>
  new Parser(
    (input) => parse(px, input).chain(([x, rest]) => parse(k(x), rest)),
  );

const empty: Parser<any> = new Parser((input) => None);

const alt = <A>(pl: Parser<A>, pr: Parser<A>): Parser<A> =>
  new Parser(
    (input) => parse(pl, input).chainNone(() => parse(pr, input)),
  );

const satisfy = (f: (c: char) => Boolean): Parser<char> =>
  item.andThen((c) => f(c) ? pure(c) : empty);

const satisfyRegex = (r: RegExp): Parser<char> => satisfy((c) => r.test(c));

const many = <A>(p: Parser<A>): Parser<A[]> =>
  new Parser((input0) => {
    let input = R.clone(input0);
    let values = [];
    while (true) {
      const result = parse(p, input);
      if (result.isSome()) {
        const [value, rest] = result.value;
        values.push(value);
        input = rest;
      } else {
        return Some([values, input]);
      }
    }
  });

const some = <A>(p: Parser<A>): Parser<A[]> =>
  new Parser((input0) => {
    let input = R.clone(input0);
    let values = [];
    while (true) {
      const result = parse(p, input);
      if (result.isSome()) {
        const [value, rest] = result.value;
        values.push(value);
        input = rest;
      } else {
        if (values.length == 0) {
          return None;
        } else {
          return Some([values, input]);
        }
      }
    }
  });

const digit: Parser<char> = satisfyRegex(/[0-9]/);

const lower: Parser<char> = satisfyRegex(/[a-z]/);

const upper: Parser<char> = satisfyRegex(/[A-Z]/);

const letter: Parser<char> = lower.or(upper);

const alphanum: Parser<char> = letter.or(digit);

const char = (c: char): Parser<char> => {
  if (c.length !== 1) {
    throw new Error("char: Input string is not a single character");
  }
  return satisfy((i) => i == c);
};

const string = (s: string): Parser<string> =>
  uncons(s).match(
    ([c, cs]) => char(c).then(string(cs)).then(pure(s)),
    () => empty,
  );

const ident: Parser<string> = lower.andThen((c) =>
  many(alphanum).map((cs) => c + cs)
);

const nat: Parser<number> = some(digit).map((s): number =>
  parseInt(s.join(""), 10)
);

const int: Parser<number> = char("-").then(nat).map((n) => -n).or(nat);

const space: Parser<null> = many(satisfyRegex(/ /)).map((c) => null);

const token = <A>(p: Parser<A>): Parser<A> =>
  space.then(p).andThen((x) => space.then(pure(x)));

const identifier: Parser<string> = token(ident);

const natural: Parser<number> = token(nat);

const integer: Parser<number> = token(int);

const symbol = (s: string): Parser<string> => token(string(s));

const optional = <A>(p: Parser<A>): Parser<Option<A>> =>
  p.map(Some).or(pure(None));

main();
	-- https://www.youtube.com/watch?v=dDtZLm7HIJs

	{-# LANGUAGE BlockArguments #-}

	module Main (main) where

	import Control.Applicative
	import Control.Monad (when)
	import Data.Char
	import Data.Foldable (for_)
	import System.Exit (die)

	main :: IO ()
	main = do
	let examples =
	[ parse digit "123" ==> [('1', "23")]
	, parse digit "abc" ==> []
	, parse (char 'a') "abc" ==> [('a', "bc")]
	, parse (char 'a') "123" ==> []
	, parse (some digit) "123" ==> [("123", "")]
	, parse (digit <\|> letter) "abc123" ==> [('a', "bc123")]
	, parse (some (digit <\|> letter)) "abc123" ==> [("abc123", "")]
	, parse expr "2+3*4" ==> [(14, "")]
	, parse expr "(2+3)*4" ==> [(20,"")]
	, parse expr "(2+(710)+8)20" ==> [(1600, "")]
	, parse expr "2+3" ==> [(5, "")]
	, parse expr "(2+3" ==> []
	]

	for_ examples \(Example actual expected) -> do
	if actual == expected
	then putStrLn $ "PASS " <> show actual
	else putStrLn $ "FAIL expected: " <> show expected <> ", actual: " <> show actual

	expr :: Parser Int
	expr =
	do
	x <- term
	char '+'
	y <- expr
	pure (x + y)
	<\|> term

	term :: Parser Int
	term =
	do
	x <- factor
	char '*'
	y <- term
	pure (x * y)
	<\|> factor

	factor :: Parser Int
	factor =
	do
	char '('
	x <- expr
	char ')'
	pure x
	<\|> int

	data Example = forall a. (Eq a, Show a) => Example a a

	(==>) :: (Eq a, Show a) => a -> a -> Example
	(==>) = Example

	--------------------------------------------------------------------------------
	-- https://www.cs.nott.ac.uk/~pszgmh/Parsing.hs
	--------------------------------------------------------------------------------

	-- Functional parsing library from chapter 13 of Programming in Haskell,
	-- Graham Hutton, Cambridge University Press, 2016.

	-- Basic definitions

	newtype Parser a = P (String -> [(a,String)])

	parse :: Parser a -> String -> [(a,String)]
	parse (P p) inp = p inp

	item :: Parser Char
	item = P (\inp -> case inp of
	[] -> []
	(x:xs) -> [(x,xs)])

	-- Sequencing parsers

	instance Functor Parser where
	-- fmap :: (a -> b) -> Parser a -> Parser b
	fmap g p = P (\inp -> case parse p inp of
	[] -> []
	[(v,out)] -> [(g v, out)])

	instance Applicative Parser where
	-- pure :: a -> Parser a
	pure v = P (\inp -> [(v,inp)])

	-- <*> :: Parser (a -> b) -> Parser a -> Parser b
	pg <*> px = P (\inp -> case parse pg inp of
	[] -> []
	[(g,out)] -> parse (fmap g px) out)

	instance Monad Parser where
	-- (>>=) :: Parser a -> (a -> Parser b) -> Parser b
	p >>= f = P (\inp -> case parse p inp of
	[] -> []
	[(v,out)] -> parse (f v) out)

	-- Making choices

	instance Alternative Parser where
	-- empty :: Parser a
	empty = P (\inp -> [])

	-- (<\|>) :: Parser a -> Parser a -> Parser a
	p <\|> q = P (\inp -> case parse p inp of
	[] -> parse q inp
	[(v,out)] -> [(v,out)])

	-- Derived primitives

	sat :: (Char -> Bool) -> Parser Char
	sat p = do x <- item
	if p x then return x else empty

	digit :: Parser Char
	digit = sat isDigit

	lower :: Parser Char
	lower = sat isLower

	upper :: Parser Char
	upper = sat isUpper

	letter :: Parser Char
	letter = sat isAlpha

	alphanum :: Parser Char
	alphanum = sat isAlphaNum

	char :: Char -> Parser Char
	char x = sat (== x)

	string :: String -> Parser String
	string [] = return []
	string (x:xs) = do char x
	string xs
	return (x:xs)

	ident :: Parser String
	ident = do x <- lower
	xs <- many alphanum
	return (x:xs)

	nat :: Parser Int
	nat = do xs <- some digit
	return (read xs)

	int :: Parser Int
	int = do char '-'
	n <- nat
	return (-n)
	<\|> nat

	-- Handling spacing

	space :: Parser ()
	space = do many (sat isSpace)
	return ()

	token :: Parser a -> Parser a
	token p = do space
	v <- p
	space
	return v

	identifier :: Parser String
	identifier = token ident

	natural :: Parser Int
	natural = token nat

	integer :: Parser Int
	integer = token int

	symbol :: String -> Parser String
	symbol xs = token (string xs)
	import * as R from "https://deno.land/x/ramda@v0.27.2/mod.ts";
	import { None, Option, Some } from "https://deno.land/x/opt@0.2.2/option.ts";

	const main = (): void => {
	const examples = [
	[parse(digit, "123"), Some(["1", "23"])],
	[parse(digit, "abc"), None],
	[parse(char("a"), "abc"), Some(["a", "bc"])],
	[parse(char("a"), "123"), None],
	[parse(some(digit).map(concat), "123"), Some(["123", ""])],
	[parse(digit.or(letter), "abc123"), Some(["a", "bc123"])],
	[parse(some(digit.or(letter)).map(concat), "abc123"), Some(["abc123", ""])],
	[parse(expr(), "2+3*4"), Some([14, ""])],
	[parse(expr(), "(2+3)*4"), Some([20, ""])],
	[parse(expr(), "(2+(710)+8)20"), Some([1600, ""])],
	[parse(expr(), "2+3"), Some([5, ""])],
	[parse(expr(), "(2+3"), None],
	];

	for (const [actual, expected] of examples) {
	if (R.equals(actual, expected)) {
	console.log("PASS", actual);
	} else {
	console.log("FAIL expected:", expected, "actual:", actual);
	}
	}
	};

	// These have to be functions returning parsers, rather than parser values, so
	// that they can refer to each other. Otherwise, JavaScript complains that the
	// ones before can't refer to ones after (including stuff further down, like
	// `int`).

	// deno-fmt-ignore
	const expr = (): Parser<number> =>
	term() .andThen(
	x => char("+") .andThen(
	_ => expr() .andThen(
	y => pure(x + y) )))
	.or( term() );

	// deno-fmt-ignore
	const term = (): Parser<number> =>
	factor() .andThen(
	x => char("*") .andThen(
	_ => term() .andThen(
	y => pure(x * y) )))
	.or( factor() );

	// deno-fmt-ignore
	const factor = (): Parser<number> =>
	char("(") .andThen(
	_ => expr() .andThen(
	x => char(")") .andThen(
	_ => pure(x) )))
	.or( int );

	class Parser<A> {
	run: (input: string) => Option<[A, string]>;

	constructor(run: (input: string) => Option<[A, string]>) {
	this.run = run;
	}

	map = <B>(f: (x: A) => B): Parser<B> => fmap(f, this);

	then = <B>(py: Parser<B>): Parser<B> => bind(this, (input) => py);

	andThen = <B>(k: (x: A) => Parser<B>): Parser<B> => bind(this, k);

	or = (pr: Parser<A>): Parser<A> => alt(this, pr);
	}

	type char = string;

	const uncons = (s: string): Option<[char, string]> => {
	const c = s.charAt(0);
	const cs = s.substring(1);
	if (c === "") {
	return None;
	} else {
	return Some([c, cs]);
	}
	};

	const concat = (cs: char[]): string => cs.join("");

	const parse = <A>(parser: Parser<A>, input: string): Option<[A, string]> =>
	parser.run(input);

	const item: Parser<char> = new Parser((input) => uncons(input));

	const fmap = <A, B>(f: (x: A) => B, px: Parser<A>): Parser<B> =>
	new Parser(
	(input) => parse(px, input).map(([x, rest]) => [f(x), rest]),
	);

	const pure = <A>(x: A): Parser<A> =>
	new Parser(
	(input) => Some([x, input]),
	);

	const apply = <A, B>(pf: Parser<(x: A) => B>, px: Parser<A>): Parser<B> =>
	new Parser(
	(input) => parse(pf, input).chain(([f, rest]) => parse(fmap(f, px), rest)),
	);

	const liftA2 = <A, B, C>(
	f: (x: A, y: B) => C,
	px: Parser<A>,
	py: Parser<B>,
	): Parser<C> => apply(fmap((x): (y: B) => C => (y) => f(x, y), px), py);

	const bind = <A, B>(px: Parser<A>, k: (x: A) => Parser<B>): Parser<B> =>
	new Parser(
	(input) => parse(px, input).chain(([x, rest]) => parse(k(x), rest)),
	);

	const empty: Parser<any> = new Parser((input) => None);

	const alt = <A>(pl: Parser<A>, pr: Parser<A>): Parser<A> =>
	new Parser(
	(input) => parse(pl, input).chainNone(() => parse(pr, input)),
	);

	const satisfy = (f: (c: char) => Boolean): Parser<char> =>
	item.andThen((c) => f(c) ? pure(c) : empty);

	const satisfyRegex = (r: RegExp): Parser<char> => satisfy((c) => r.test(c));

	const many = <A>(p: Parser<A>): Parser<A[]> =>
	new Parser((input0) => {
	let input = R.clone(input0);
	let values = [];
	while (true) {
	const result = parse(p, input);
	if (result.isSome()) {
	const [value, rest] = result.value;
	values.push(value);
	input = rest;
	} else {
	return Some([values, input]);
	}
	}
	});

	const some = <A>(p: Parser<A>): Parser<A[]> =>
	new Parser((input0) => {
	let input = R.clone(input0);
	let values = [];
	while (true) {
	const result = parse(p, input);
	if (result.isSome()) {
	const [value, rest] = result.value;
	values.push(value);
	input = rest;
	} else {
	if (values.length == 0) {
	return None;
	} else {
	return Some([values, input]);
	}
	}
	}
	});

	const digit: Parser<char> = satisfyRegex(/[0-9]/);

	const lower: Parser<char> = satisfyRegex(/[a-z]/);

	const upper: Parser<char> = satisfyRegex(/[A-Z]/);

	const letter: Parser<char> = lower.or(upper);

	const alphanum: Parser<char> = letter.or(digit);

	const char = (c: char): Parser<char> => {
	if (c.length !== 1) {
	throw new Error("char: Input string is not a single character");
	}
	return satisfy((i) => i == c);
	};

	const string = (s: string): Parser<string> =>
	uncons(s).match(
	([c, cs]) => char(c).then(string(cs)).then(pure(s)),
	() => empty,
	);

	const ident: Parser<string> = lower.andThen((c) =>
	many(alphanum).map((cs) => c + cs)
	);

	const nat: Parser<number> = some(digit).map((s): number =>
	parseInt(s.join(""), 10)
	);

	const int: Parser<number> = char("-").then(nat).map((n) => -n).or(nat);

	const space: Parser<null> = many(satisfyRegex(/ /)).map((c) => null);

	const token = <A>(p: Parser<A>): Parser<A> =>
	space.then(p).andThen((x) => space.then(pure(x)));

	const identifier: Parser<string> = token(ident);

	const natural: Parser<number> = token(nat);

	const integer: Parser<number> = token(int);

	const symbol = (s: string): Parser<string> => token(string(s));

	const optional = <A>(p: Parser<A>): Parser<Option<A>> =>
	p.map(Some).or(pure(None));

	main();