Created
February 3, 2022 19:46
-
-
Save jliszka/97a8461ef1a0df66cea1d25a89ed1355 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?hh // strict | |
use namespace \HH\Lib\{C, Regex, Str, Vec, Math}; | |
/* | |
"(3 + 4) - sin(3.14 / 2) * 3 * 2" | |
<exp> ::= | |
<product> "+" <exp> | |
| <product> "-" <exp> | |
| <product> | |
<product> ::= | |
<factor> "*" <product> | |
| <factor> "/" <product> | |
| <factor> | |
<factor> ::= | |
"-" <factor> | |
| "(" <exp> ")" | |
| <fn> "(" <exp> ")" | |
| <num> | |
<fn> ::= <word> | |
*/ | |
<<__EntryPoint>> | |
function go(): void { | |
$input = "(3 + 4) - sin(3.14 / 2) * 3*2"; | |
// Lexer does this | |
$tokens = vec["(", "3", "+", "4", ")", "-", "sin", "(", "3.14", "/", "2", ")", "*", "3", "*", "2"]; | |
// Parser does this | |
$exp = new BinaryOp( | |
new BinaryOp(new Number(3.0), "+", new Number(4.0)), | |
"-", | |
new BinaryOp( | |
new UnaryOp("sin", new BinaryOp(new Number(3.14), "/", new Number(2.0))), | |
"*", | |
new BinaryOp(new Number(3.0), "*", new Number(2.0)), | |
), | |
); | |
// Evaluate it | |
print_r($exp->eval()); | |
print_r("\n"); | |
// Want to get this to work: | |
$exp = parseExp()->parseAll($tokens); | |
print_r($exp); | |
print_r($exp->eval()); | |
print_r("\n"); | |
} | |
// | |
// Data structure to parse into | |
// | |
abstract class Exp { | |
abstract public function eval(): float; | |
} | |
class BinaryOp extends Exp { | |
public function __construct(private Exp $lhs, private string $op, private Exp $rhs): void {} | |
public function eval(): float { | |
$a = $this->lhs->eval(); | |
$b = $this->rhs->eval(); | |
switch ($this->op) { | |
case "+": | |
return $a + $b; | |
case "-": | |
return $a - $b; | |
case "*": | |
return $a * $b; | |
case "/": | |
return $a / $b; | |
} | |
throw new Exception("invalid op ".$this->op); | |
} | |
} | |
class UnaryOp extends Exp { | |
public function __construct(private string $op, private Exp $exp): void {} | |
public function eval(): float { | |
$a = $this->exp->eval(); | |
switch ($this->op) { | |
case "-": | |
return -$a; | |
case "sin": | |
return Math\sin($a); | |
} | |
throw new Exception("invalid op ".$this->op); | |
} | |
} | |
class Number extends Exp { | |
public function __construct(private float $num): void {} | |
public function eval(): float { | |
return $this->num; | |
} | |
} | |
// | |
// Let's parse | |
// | |
abstract class Parser<T> { | |
abstract public function parse(vec<string> $input, int $pos): ?(T, int); | |
public function parseAll(vec<string> $input): T { | |
$res = $this->parse($input, 0); | |
if ($res == null) { | |
throw new Exception("parse error!"); | |
} | |
return $res[0]; | |
} | |
// For later... | |
public function map<T2>((function(T): T2) $f): Parser<T2> { | |
return new MappedParser($this, $f); | |
} | |
public function andThen<T2>(Parser<T2> $p2): Parser<(T, T2)> { | |
return new ParseSeq($this, $p2); | |
} | |
} | |
// | |
// Simple parsers | |
// | |
class ParseAnything extends Parser<string> { | |
public function parse(vec<string> $input, int $pos): ?(string, int) { | |
if ($pos >= C\count($input)) { | |
return null; | |
} | |
return tuple($input[$pos], $pos + 1); | |
} | |
} | |
class ParseNumber extends Parser<float> { | |
public function parse(vec<string> $input, int $pos): ?(float, int) { | |
if ($pos >= C\count($input)) { | |
return null; | |
} | |
if (!Regex\matches($input[$pos], re"/-?[1-9][0-9]*(\.[0-9]+)?/")) { | |
return null; | |
} | |
return tuple((float)$input[$pos], $pos + 1); | |
} | |
} | |
class ParseLiteral extends Parser<string> { | |
public function __construct(private string $token): void {} | |
public function parse(vec<string> $input, int $pos): ?(string, int) { | |
if ($pos >= C\count($input)) { | |
return null; | |
} | |
if ($input[$pos] != $this->token) { | |
return null; | |
} | |
return tuple($input[$pos], $pos + 1); | |
} | |
} | |
// | |
// Combinators! | |
// | |
class ParseOneOf<T> extends Parser<T> { | |
public function __construct(private (function(): vec<Parser<T>>) $parsers): void {} | |
public function parse(vec<string> $input, int $pos): ?(T, int) { | |
$parsers = ($this->parsers)(); | |
foreach ($parsers as $parser) { | |
$res = $parser->parse($input, $pos); | |
if ($res != null) { | |
return $res; | |
} | |
} | |
return null; | |
} | |
} | |
class ParseSeq<T1, T2> extends Parser<(T1, T2)> { | |
public function __construct(private Parser<T1> $pt1, private Parser<T2> $pt2): void {} | |
public function parse(vec<string> $input, int $pos): ?((T1, T2), int) { | |
$res = $this->pt1->parse($input, $pos); | |
if ($res == null) { | |
return null; | |
} | |
list($t1, $pos2) = $res; | |
$res = $this->pt2->parse($input, $pos2); | |
if ($res == null) { | |
return null; | |
} | |
list($t2, $pos3) = $res; | |
return tuple(tuple($t1, $t2), $pos3); | |
} | |
} | |
class MappedParser<T1, T2> extends Parser<T2> { | |
public function __construct(private Parser<T1> $pt, private (function(T1): T2) $f): void {} | |
public function parse(vec<string> $input, int $pos): ?(T2, int) { | |
$res = $this->pt->parse($input, $pos); | |
if ($res == null) { | |
return null; | |
} | |
list($t, $pos2) = $res; | |
return tuple(($this->f)($t), $pos2); | |
} | |
} | |
// | |
// Some helpers... | |
// | |
function lit(string $token): Parser<string> { | |
return new ParseLiteral($token); | |
} | |
function anyWord(): Parser<string> { | |
return new ParseAnything(); | |
} | |
function anyNum(): Parser<float> { | |
return new ParseNumber(); | |
} | |
function oneOf<T>((function(): vec<Parser<T>>) $parsers): Parser<T> { | |
return new ParseOneOf($parsers); | |
} | |
function seq2<T1, T2>(Parser<T1> $p1, Parser<T2> $p2): Parser<(T1, T2)> { | |
return $p1->andThen($p2); | |
} | |
function seq3<T1, T2, T3>(Parser<T1> $p1, Parser<T2> $p2, Parser<T3> $p3): Parser<(T1, T2, T3)> { | |
return $p1->andThen($p2) | |
->andThen($p3) | |
->map($t ==> tuple($t[0][0], $t[0][1], $t[1])); | |
} | |
function seq4<T1, T2, T3, T4>( | |
Parser<T1> $p1, | |
Parser<T2> $p2, | |
Parser<T3> $p3, | |
Parser<T4> $p4, | |
): Parser<(T1, T2, T3, T4)> { | |
return $p1->andThen($p2) | |
->andThen($p3) | |
->andThen($p4) | |
->map($t ==> tuple($t[0][0][0], $t[0][0][1], $t[0][1], $t[1])); | |
} | |
// | |
// The main event... | |
// | |
/* | |
<exp> ::= | |
<product> "+" <exp> | |
| <product> "-" <exp> | |
| <product> | |
*/ | |
function parseExp(): Parser<Exp> { | |
return oneOf( | |
() ==> vec[ | |
seq3(parseProduct(), lit("+"), parseExp())->map($r ==> new BinaryOp($r[0], $r[1], $r[2])), | |
seq3(parseProduct(), lit("-"), parseExp())->map($r ==> new BinaryOp($r[0], $r[1], $r[2])), | |
parseProduct(), | |
], | |
); | |
} | |
/* | |
<product> ::= | |
<factor> "*" <product> | |
| <factor> "/" <product> | |
| <factor> | |
*/ | |
function parseProduct(): Parser<Exp> { | |
$op = oneOf(() ==> vec[lit("*"), lit("/")]); | |
return oneOf( | |
() ==> vec[ | |
seq3(parseFactor(), $op, parseProduct())->map($r ==> new BinaryOp($r[0], $r[1], $r[2])), | |
parseFactor(), | |
], | |
); | |
} | |
/* | |
<factor> ::= | |
"-" <factor> | |
| "(" <exp> ")" | |
| <fn> "(" <exp> ")" | |
| <num> | |
*/ | |
function parseFactor(): Parser<Exp> { | |
return oneOf( | |
() ==> vec[ | |
seq2(lit("-"), parseFactor())->map($r ==> new UnaryOp($r[0], $r[1])), | |
seq3(lit("("), parseExp(), lit(")"))->map($r ==> $r[1]), | |
seq4(anyWord(), lit("("), parseExp(), lit(")"))->map($r ==> new UnaryOp($r[0], $r[2])), | |
anyNum()->map($n ==> new Number($n)), | |
], | |
); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment