Skip to content

Instantly share code, notes, and snippets.

@picasso250
Last active December 18, 2015 00:19
Show Gist options
  • Save picasso250/5695893 to your computer and use it in GitHub Desktop.
Save picasso250/5695893 to your computer and use it in GitHub Desktop.
<?php
ini_set('display_errors', 1);
error_reporting(E_ALL | E_STRICT);
/**
* 这个 JSON 解析器还有以下问题:
* 不能处理utf编码
* 用的是递归,可能会产生 stack overflow
* 整体需要扫描两遍以上,效率低
*/
class JsonDecoder
{
private $_str;
public function __construct($str = null)
{
$this->_str = $str;
}
public function decode($str = null)
{
if ($str !== null) {
$this->_str = $str;
}
list($val, $str_left) = $this->_consume_val($this->_str);
return $val;
}
private function _consume_val($str)
{
$first_char = $str[0];
if ($first_char == '"') {
return $this->_consume_str(substr($str, 1));
} elseif ($first_char == '[') {
return $this->_consume_arr(substr($str, 1));
} elseif ($first_char == '{') {
return $this->_consume_obj(substr($str, 1));
} else {
return $this->_consume_lit($str);
}
}
private function _consume_lit($str)
{
$i = 0;
$strlen = strlen($str);
while ($i < $strlen) {
$char = $str[$i];
if (false === strpos('truefalsenul0123456789e+-', $char)) {
return array($this->_lit_callback(substr($str, 0, $i)), substr($str, $i));
}
$i += 1;
}
return array($this->_lit_callback($str), '');
}
private function _consume_str($str)
{
$i = 0;
$strlen = strlen($str);
while ($i < $strlen) {
$char = $str[$i];
if ($char == '\\') {
$i += 2;
continue;
}
if ($char == '"') {
return array($this->_str_callback(substr($str, 0, $i)), substr($str, $i+1));
}
$i += 1;
}
throw new Exception("string has no end: $str", 1);
}
private function _consume_arr($str)
{
$str_left = $str;
$arr = array();
while ($str_left[0] != ']') {
list($val, $str_left) = $this->_consume_val($str_left);
$arr[] = $val;
if ($str_left[0] == ',') {
$str_left = substr($str_left, 1);
}
}
return array($arr, substr($str_left, 1));
}
private function _consume_obj($str)
{
$str_left = $str;
$obj = new stdClass;
while ($str_left[0] != '}') {
list($key, $value, $str_left) = $this->_consume_pair($str_left);
$obj->$key = $value;
if ($str_left[0] == ',') {
$str_left = substr($str_left, 1);
}
}
return array($obj, substr($str_left, 1));
}
private function _consume_pair($str)
{
list($key, $str_left) = $this->_consume_str(substr($str, 1));
list($value, $str_left) = $this->_consume_val(substr($str_left, 1)); // trim ':'
return array($key, $value, $str_left);
}
public function _str_callback($str)
{
$ret = '';
$i = 0;
$strlen = strlen($str);
while ($i < $strlen) {
$char = $str[$i];
if ($char == '\\') {
$i += 1;
$m = $str[$i];
switch ($m) {
case 'n':
$char = "\n";
break;
case '\\':
$char = '\\';
break;
case '/':
$char = "/";
break;
case 'b':
$char = "\b";
break;
case 'f':
$char = "\f";
break;
case 'r':
$char = "\r";
break;
case 't':
$char = "\t";
break;
case '"':
$char = "\"";
break;
case 'u':
$char = "\u";
$ustr = substr($str, $i+1, 4);
$u = base_convert($ustr, 16, 10);
$char = mb_convert_encoding('&#'.$u.';', 'UTF-8', 'HTML-ENTITIES');
$i += 4;
break;
default:
throw new Exception('unknown \\'.$m);
break;
}
}
$ret .= $char;
$i += 1;
}
return $ret;
}
public function _lit_callback($str)
{
if ($str == 'true') {
return true;
}
if ($str == 'false') {
return false;
}
if ($str == 'null') {
return null;
}
if (preg_match('/^[+-]?[1-9]*\d$/', $str)) {
return intval($str);
}
if (preg_match('/^[+-]?[1-9]*\d\.\d+$/', $str) || preg_match('/^[+-]?[1-9]*\d(?:\.\d+)?[eE][+-]?\d+$/', $str)) {
return floatval($str);
}
throw new Exception("can't parse literal '$str'", 1);
}
}
function my_json_decode($str)
{
$jd = new JsonDecoder($str);
return $jd->decode();
}
echo "<h2>Literal</h2>";
$cases = array(
'42',
'0',
'-3.10',
'5.32e-2',
'-5E6',
'true',
'false',
'null',
);
foreach ($cases as $str) {
echo "<pre>$str<pre>";
$jd = new JsonDecoder();
$val = $jd->_lit_callback($str);
var_dump($val);
echo "<hr>";
}
echo "<h2>String</h2>";
$cases = array(
'"hello"',
'"hi\nThis is a new line.\n\tReply \"\u5f53\u65f6\""',
'""',
);
foreach ($cases as $str) {
echo "<pre>$str<pre>";
$jd = new JsonDecoder();
$str = substr($str, 1, strlen($str)-2);
$val = $jd->_str_callback($str);
var_dump($val);
echo "<hr>";
}
echo "<h2>Array</h2>";
$cases = array(
'[42]',
'[]',
'["z",31e-2]',
'["x",["z",[33,44],55]]',
);
foreach ($cases as $str) {
echo "<pre>$str<pre>";
$jd = new JsonDecoder($str);
$val = $jd->decode();
var_dump($val);
echo "<hr>";
}
echo "<h2>Object</h2>";
$cases = array(
'{"number":42}',
'{}',
'{"z":31e-2,"hello":"world"}',
'{"x":{"z":[33,44]}}',
);
foreach ($cases as $str) {
echo "<pre>$str<pre>";
$jd = new JsonDecoder($str);
$val = $jd->decode();
var_dump($val);
echo "<hr>";
}

自己实现 json_decode()

JSON 中的值有 7 种形式

  • string
  • number
  • object
  • array
  • true
  • false
  • null

我们的目的是实现 PHP 自带的 json_decode($json_str) 函数。但一步到位会比较难,所以,我把这个问题分为三步。

第一步、写一个 json_number_decode($json_str) 函数,接受一个 数字 的 JSON 字符串,返回其值。

用例:

  • 42
  • 0
  • -3.10
  • 5.32e-2
  • -5E6

第二步、写一个 json_scalar_decode($json_str) 函数,接受一个 标量 的 JSON 字符串,返回其值。其中标量是指 string number true false null 这 5 种值,其中字符串表示总是英文的,但其值可能是中文。

用例:

  • "hello"
  • "hi\nThis is a new line.\n\tReply \"\u5f53\u65f6\""
  • ""
  • 5.8
  • true
  • false
  • null

第三步、写一个 my_json_decode($json_str) 函数,接受一个 JSON 字符串,返回其值。也就是 基本实现 PHP 的 json_decode() 函数。

用例:

  • {"key":"137026643717244","data":""\u5c55\u958b\u5c4f\u853d\u9078\u9805。当然中文表示也无关系" action-type="feed_list_shield"","extra":["hello", 4e8, null]}
  • [{"one":"bar"},42,3e6,false]

JSON 的全部标准请参见 http://www.json.org/

所谓 基本实现 是指

  1. 只接受正确的 JSON 字符串(为减少本题的难度,不需要做容错处理,不考虑健壮性)
  2. 数值范围以 PHP 为准
  3. 不考虑效率
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment