Created
April 26, 2016 07:27
-
-
Save sword-jin/50029205199c565923f8e34b4878ebcc to your computer and use it in GitHub Desktop.
Using php make a tiny is a joke.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// php make compiler is a joke. | |
class Compiler | |
{ | |
// `tokens` 数组用于存所有的 token,待会我们会把 token 一个个 push 进去 | |
protected $tokens = []; | |
// `current` 变量用于存当前处理到哪里了, 就像手指指着纸上我们处理到哪个位置了 | |
protected $current; | |
protected $ast = [ | |
'type' => 'Program', | |
'body' => [] | |
]; | |
public function tokenizer($input) { | |
$this->current = 0; | |
while ($this->current < strlen($input)) { | |
// 把当前的单个字符拿到 | |
$char = $input[$this->current]; | |
// 看看是不是左括号 | |
if ($char == '(') { | |
$this->tokens[] = [ | |
'type' => 'paren', | |
'value' => '(' | |
]; | |
// 直接下一次循环 | |
$this->current ++; | |
continue; | |
} | |
// 看看是不是右括号 | |
if ($char == ')') { | |
$this->tokens[] = [ | |
'type' => 'paren', | |
'value' => ')' | |
]; | |
// 直接下一次循环 | |
$this->current ++; | |
continue; | |
} | |
// 跳过空格,换行符 /\s/ | |
if (preg_match('/\s/', $char)) { | |
$this->current ++; | |
continue; | |
} | |
// 一整串数字 | |
if (preg_match('/\d/', $char)) { | |
$value = ''; | |
while (preg_match('/\d/', $char)) { | |
$value .= $char; | |
$char = $input[++ $this->current]; | |
} | |
$this->tokens[] = [ | |
'type' => 'number', | |
'value' => $value | |
]; | |
continue; | |
} | |
// 一整串字符 | |
if (preg_match('/[a-z]/', $char)) { | |
$value = ''; | |
while (preg_match('/[a-z]/', $char)) { | |
$value .= $char; | |
$char = $input[++ $this->current]; | |
} | |
$this->tokens[] = [ | |
'type' => 'name', | |
'value' => $value | |
]; | |
continue; | |
} | |
// 异常 | |
throw new Exception("I dont know what this character is: {$char}"); | |
} | |
return $this->tokens; | |
} | |
public function parser($tokens = null) { | |
$tokens = $tokens ?: $this->tokens; | |
$this->current = 0; | |
while ($this->current < count($tokens)) { | |
$this->ast['body'][] = $this->walk($tokens); | |
} | |
return $this->ast; | |
} | |
public function transformer($ast) | |
{ | |
$ast = $ast ?: $this->ast; | |
$newAst = [ | |
'type' => 'Program', | |
'body' => [] | |
]; | |
} | |
protected function walk($tokens) { | |
$token = $tokens[$this->current]; | |
// 如果是数字 | |
if ($token['type'] == 'number') { | |
$this->current++; | |
return [ | |
'type' => 'NumberLiteral', | |
'value' => $token['value'] | |
]; | |
} | |
// 如果是括号 而且是 左括号 | |
if ($token['type'] == 'paren' && $token['value'] == '(') { | |
// current + 1 因为在 AST 里我们不关心括号 | |
$token = $tokens[++ $this->current]; | |
// 我们创建一个类型是 CallExpression 的节点 | |
// 值是函数名, 也就是紧跟左括号后面的 add 或 subtract 这样的名字 | |
$node = [ | |
'type' => 'CallExpression', | |
'name' => $token['value'], // add or subtract, you get the idea | |
'params' => [] | |
]; | |
// 函数名处理了, 这里就 +1 跳过函数名. | |
$token = $tokens[++ $this->current]; | |
// 这里用 while 循环, 直到碰到右括号才退出. | |
while (($token['type'] != 'paren') || | |
($token['type'] == 'paren' && $token['value'] != ')')) { | |
$node['params'][] = $this->walk($tokens); | |
$token = $tokens[$this->current]; | |
} | |
$this->current ++; | |
return $node; | |
} | |
throw new Exception("I can't parse this token {$token}"); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment