Created
August 5, 2015 16:05
-
-
Save disem/d5f76d716303a1ec73fe to your computer and use it in GitHub Desktop.
CJSON benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$array = require_once('world_cities_array.php'); | |
var_dump("Test case elements count: " . count($array)); | |
class CJSON | |
{ | |
protected static function reduceString($str) | |
{ | |
$str = preg_replace(array( | |
// eliminate single line comments in '// ...' form | |
'#^\s*//(.+)$#m', | |
// eliminate multi-line comments in '/* ... */' form, at start of string | |
'#^\s*/\*(.+)\*/#Us', | |
// eliminate multi-line comments in '/* ... */' form, at end of string | |
'#/\*(.+)\*/\s*$#Us' | |
), '', $str); | |
// eliminate extraneous space | |
return trim($str); | |
} | |
protected static function nameValue($name, $value) | |
{ | |
return self::encode(strval($name)) . ':' . self::encode($value); | |
} | |
public static function encode($var) | |
{ | |
switch (gettype($var)) { | |
case 'boolean': | |
return $var ? 'true' : 'false'; | |
case 'NULL': | |
return 'null'; | |
case 'integer': | |
return (int)$var; | |
case 'double': | |
case 'float': | |
return str_replace(',', '.', (float)$var); // locale-independent representation | |
case 'string': | |
/*if (($enc = strtoupper(Yii::app()->charset)) !== 'UTF-8') | |
$var = iconv($enc, 'UTF-8', $var);*/ | |
if (function_exists('json_encode')) | |
return json_encode($var); | |
// STRINGS ARE EXPECTED TO BE IN ASCII OR UTF-8 FORMAT | |
$ascii = ''; | |
$strlen_var = strlen($var); | |
/* | |
* Iterate over every character in the string, | |
* escaping with a slash or encoding to UTF-8 where necessary | |
*/ | |
for ($c = 0; $c < $strlen_var; ++$c) { | |
$ord_var_c = ord($var{$c}); | |
switch (true) { | |
case $ord_var_c == 0x08: | |
$ascii .= '\b'; | |
break; | |
case $ord_var_c == 0x09: | |
$ascii .= '\t'; | |
break; | |
case $ord_var_c == 0x0A: | |
$ascii .= '\n'; | |
break; | |
case $ord_var_c == 0x0C: | |
$ascii .= '\f'; | |
break; | |
case $ord_var_c == 0x0D: | |
$ascii .= '\r'; | |
break; | |
case $ord_var_c == 0x22: | |
case $ord_var_c == 0x2F: | |
case $ord_var_c == 0x5C: | |
// double quote, slash, slosh | |
$ascii .= '\\' . $var{$c}; | |
break; | |
case (($ord_var_c >= 0x20) && ($ord_var_c <= 0x7F)): | |
// characters U-00000000 - U-0000007F (same as ASCII) | |
$ascii .= $var{$c}; | |
break; | |
case (($ord_var_c & 0xE0) == 0xC0): | |
// characters U-00000080 - U-000007FF, mask 110XXXXX | |
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 | |
$char = pack('C*', $ord_var_c, ord($var{$c + 1})); | |
$c += 1; | |
$utf16 = self::utf8ToUTF16BE($char); | |
$ascii .= sprintf('\u%04s', bin2hex($utf16)); | |
break; | |
case (($ord_var_c & 0xF0) == 0xE0): | |
// characters U-00000800 - U-0000FFFF, mask 1110XXXX | |
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 | |
$char = pack('C*', $ord_var_c, | |
ord($var{$c + 1}), | |
ord($var{$c + 2})); | |
$c += 2; | |
$utf16 = self::utf8ToUTF16BE($char); | |
$ascii .= sprintf('\u%04s', bin2hex($utf16)); | |
break; | |
case (($ord_var_c & 0xF8) == 0xF0): | |
// characters U-00010000 - U-001FFFFF, mask 11110XXX | |
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 | |
$char = pack('C*', $ord_var_c, | |
ord($var{$c + 1}), | |
ord($var{$c + 2}), | |
ord($var{$c + 3})); | |
$c += 3; | |
$utf16 = self::utf8ToUTF16BE($char); | |
$ascii .= sprintf('\u%04s', bin2hex($utf16)); | |
break; | |
case (($ord_var_c & 0xFC) == 0xF8): | |
// characters U-00200000 - U-03FFFFFF, mask 111110XX | |
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 | |
$char = pack('C*', $ord_var_c, | |
ord($var{$c + 1}), | |
ord($var{$c + 2}), | |
ord($var{$c + 3}), | |
ord($var{$c + 4})); | |
$c += 4; | |
$utf16 = self::utf8ToUTF16BE($char); | |
$ascii .= sprintf('\u%04s', bin2hex($utf16)); | |
break; | |
case (($ord_var_c & 0xFE) == 0xFC): | |
// characters U-04000000 - U-7FFFFFFF, mask 1111110X | |
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 | |
$char = pack('C*', $ord_var_c, | |
ord($var{$c + 1}), | |
ord($var{$c + 2}), | |
ord($var{$c + 3}), | |
ord($var{$c + 4}), | |
ord($var{$c + 5})); | |
$c += 5; | |
$utf16 = self::utf8ToUTF16BE($char); | |
$ascii .= sprintf('\u%04s', bin2hex($utf16)); | |
break; | |
} | |
} | |
return '"' . $ascii . '"'; | |
case 'array': | |
/* | |
* As per JSON spec if any array key is not an integer | |
* we must treat the the whole array as an object. We | |
* also try to catch a sparsely populated associative | |
* array with numeric keys here because some JS engines | |
* will create an array with empty indexes up to | |
* max_index which can cause memory issues and because | |
* the keys, which may be relevant, will be remapped | |
* otherwise. | |
* | |
* As per the ECMA and JSON specification an object may | |
* have any string as a property. Unfortunately due to | |
* a hole in the ECMA specification if the key is a | |
* ECMA reserved word or starts with a digit the | |
* parameter is only accessible using ECMAScript's | |
* bracket notation. | |
*/ | |
// treat as a JSON object | |
if (is_array($var) && count($var) && (array_keys($var) !== range(0, sizeof($var) - 1))) { | |
return '{' . | |
join(',', array_map(array('CJSON', 'nameValue'), | |
array_keys($var), | |
array_values($var))) | |
. '}'; | |
} | |
// treat it like a regular array | |
return '[' . join(',', array_map(array('CJSON', 'encode'), $var)) . ']'; | |
case 'object': | |
// Check for the JsonSerializable interface available in PHP5.4 | |
// Note that instanceof returns false in case it doesnt know the interface. | |
if (interface_exists('JsonSerializable', false) && $var instanceof JsonSerializable) { | |
// We use the function defined in the interface instead of json_encode. | |
// This way even for PHP < 5.4 one could define the interface and use it. | |
return self::encode($var->jsonSerialize()); | |
} elseif ($var instanceof Traversable) { | |
$vars = array(); | |
foreach ($var as $k => $v) | |
$vars[$k] = $v; | |
} else | |
$vars = get_object_vars($var); | |
return '{' . | |
join(',', array_map(array('CJSON', 'nameValue'), | |
array_keys($vars), | |
array_values($vars))) | |
. '}'; | |
default: | |
return ''; | |
} | |
} | |
public static function decode($str, $useArray = true) | |
{ | |
if (function_exists('json_decode')) { | |
$json = json_decode($str, $useArray); | |
// based on investigation, native fails sometimes returning null. | |
// see: http://gggeek.altervista.org/sw/article_20070425.html | |
// As of PHP 5.3.6 it still fails on some valid JSON strings | |
if ($json !== null) | |
return $json; | |
} | |
$str = self::reduceString($str); | |
switch (strtolower($str)) { | |
case 'true': | |
return true; | |
case 'false': | |
return false; | |
case 'null': | |
return null; | |
default: | |
if (is_numeric($str)) { | |
// Lookie-loo, it's a number | |
// This would work on its own, but I'm trying to be | |
// good about returning integers where appropriate: | |
// return (float)$str; | |
// Return float or int, as appropriate | |
return ((float)$str == (integer)$str) | |
? (integer)$str | |
: (float)$str; | |
} elseif (preg_match('/^("|\').+(\1)$/s', $str, $m) && $m[1] == $m[2]) { | |
// STRINGS RETURNED IN UTF-8 FORMAT | |
$delim = substr($str, 0, 1); | |
$chrs = substr($str, 1, -1); | |
$utf8 = ''; | |
$strlen_chrs = strlen($chrs); | |
for ($c = 0; $c < $strlen_chrs; ++$c) { | |
$substr_chrs_c_2 = substr($chrs, $c, 2); | |
$ord_chrs_c = ord($chrs{$c}); | |
switch (true) { | |
case $substr_chrs_c_2 == '\b': | |
$utf8 .= chr(0x08); | |
++$c; | |
break; | |
case $substr_chrs_c_2 == '\t': | |
$utf8 .= chr(0x09); | |
++$c; | |
break; | |
case $substr_chrs_c_2 == '\n': | |
$utf8 .= chr(0x0A); | |
++$c; | |
break; | |
case $substr_chrs_c_2 == '\f': | |
$utf8 .= chr(0x0C); | |
++$c; | |
break; | |
case $substr_chrs_c_2 == '\r': | |
$utf8 .= chr(0x0D); | |
++$c; | |
break; | |
case $substr_chrs_c_2 == '\\"': | |
case $substr_chrs_c_2 == '\\\'': | |
case $substr_chrs_c_2 == '\\\\': | |
case $substr_chrs_c_2 == '\\/': | |
if (($delim == '"' && $substr_chrs_c_2 != '\\\'') || | |
($delim == "'" && $substr_chrs_c_2 != '\\"') | |
) { | |
$utf8 .= $chrs{++$c}; | |
} | |
break; | |
case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $c, 6)): | |
// single, escaped unicode character | |
$utf16 = chr(hexdec(substr($chrs, ($c + 2), 2))) | |
. chr(hexdec(substr($chrs, ($c + 4), 2))); | |
$utf8 .= self::utf16beToUTF8($utf16); | |
$c += 5; | |
break; | |
case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F): | |
$utf8 .= $chrs{$c}; | |
break; | |
case ($ord_chrs_c & 0xE0) == 0xC0: | |
// characters U-00000080 - U-000007FF, mask 110XXXXX | |
//see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 | |
$utf8 .= substr($chrs, $c, 2); | |
++$c; | |
break; | |
case ($ord_chrs_c & 0xF0) == 0xE0: | |
// characters U-00000800 - U-0000FFFF, mask 1110XXXX | |
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 | |
$utf8 .= substr($chrs, $c, 3); | |
$c += 2; | |
break; | |
case ($ord_chrs_c & 0xF8) == 0xF0: | |
// characters U-00010000 - U-001FFFFF, mask 11110XXX | |
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 | |
$utf8 .= substr($chrs, $c, 4); | |
$c += 3; | |
break; | |
case ($ord_chrs_c & 0xFC) == 0xF8: | |
// characters U-00200000 - U-03FFFFFF, mask 111110XX | |
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 | |
$utf8 .= substr($chrs, $c, 5); | |
$c += 4; | |
break; | |
case ($ord_chrs_c & 0xFE) == 0xFC: | |
// characters U-04000000 - U-7FFFFFFF, mask 1111110X | |
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 | |
$utf8 .= substr($chrs, $c, 6); | |
$c += 5; | |
break; | |
} | |
} | |
return $utf8; | |
} elseif (preg_match('/^\[.*\]$/s', $str) || preg_match('/^\{.*\}$/s', $str)) { | |
// array, or object notation | |
if ($str{0} == '[') { | |
$stk = array(self::JSON_IN_ARR); | |
$arr = array(); | |
} else { | |
if ($useArray) { | |
$stk = array(self::JSON_IN_OBJ); | |
$obj = array(); | |
} else { | |
$stk = array(self::JSON_IN_OBJ); | |
$obj = new stdClass(); | |
} | |
} | |
$stk[] = array('what' => self::JSON_SLICE, 'where' => 0, 'delim' => false); | |
$chrs = substr($str, 1, -1); | |
$chrs = self::reduceString($chrs); | |
if ($chrs == '') { | |
if (reset($stk) == self::JSON_IN_ARR) { | |
return $arr; | |
} else { | |
return $obj; | |
} | |
} | |
//print("\nparsing {$chrs}\n"); | |
$strlen_chrs = strlen($chrs); | |
for ($c = 0; $c <= $strlen_chrs; ++$c) { | |
$top = end($stk); | |
$substr_chrs_c_2 = substr($chrs, $c, 2); | |
if (($c == $strlen_chrs) || (($chrs{$c} == ',') && ($top['what'] == self::JSON_SLICE))) { | |
// found a comma that is not inside a string, array, etc., | |
// OR we've reached the end of the character list | |
$slice = substr($chrs, $top['where'], ($c - $top['where'])); | |
$stk[] = array('what' => self::JSON_SLICE, 'where' => ($c + 1), 'delim' => false); | |
//print("Found split at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n"); | |
if (reset($stk) == self::JSON_IN_ARR) { | |
// we are in an array, so just push an element onto the stack | |
$arr[] = self::decode($slice, $useArray); | |
} elseif (reset($stk) == self::JSON_IN_OBJ) { | |
// we are in an object, so figure | |
// out the property name and set an | |
// element in an associative array, | |
// for now | |
if (preg_match('/^\s*(["\'].*[^\\\]["\'])\s*:\s*(\S.*),?$/Uis', $slice, $parts)) { | |
// "name":value pair | |
$key = self::decode($parts[1], $useArray); | |
$val = self::decode($parts[2], $useArray); | |
if ($useArray) { | |
$obj[$key] = $val; | |
} else { | |
$obj->$key = $val; | |
} | |
} elseif (preg_match('/^\s*(\w+)\s*:\s*(\S.*),?$/Uis', $slice, $parts)) { | |
// name:value pair, where name is unquoted | |
$key = $parts[1]; | |
$val = self::decode($parts[2], $useArray); | |
if ($useArray) { | |
$obj[$key] = $val; | |
} else { | |
$obj->$key = $val; | |
} | |
} | |
} | |
} elseif ((($chrs{$c} == '"') || ($chrs{$c} == "'")) && ($top['what'] != self::JSON_IN_STR)) { | |
// found a quote, and we are not inside a string | |
$stk[] = array('what' => self::JSON_IN_STR, 'where' => $c, 'delim' => $chrs{$c}); | |
//print("Found start of string at {$c}\n"); | |
} elseif (($chrs{$c} == $top['delim']) && | |
($top['what'] == self::JSON_IN_STR) && | |
(($chrs{$c - 1} != "\\") || | |
($chrs{$c - 1} == "\\" && $chrs{$c - 2} == "\\")) | |
) { | |
// found a quote, we're in a string, and it's not escaped | |
array_pop($stk); | |
//print("Found end of string at {$c}: ".substr($chrs, $top['where'], (1 + 1 + $c - $top['where']))."\n"); | |
} elseif (($chrs{$c} == '[') && | |
in_array($top['what'], array(self::JSON_SLICE, self::JSON_IN_ARR, self::JSON_IN_OBJ)) | |
) { | |
// found a left-bracket, and we are in an array, object, or slice | |
$stk[] = array('what' => self::JSON_IN_ARR, 'where' => $c, 'delim' => false); | |
//print("Found start of array at {$c}\n"); | |
} elseif (($chrs{$c} == ']') && ($top['what'] == self::JSON_IN_ARR)) { | |
// found a right-bracket, and we're in an array | |
array_pop($stk); | |
//print("Found end of array at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n"); | |
} elseif (($chrs{$c} == '{') && | |
in_array($top['what'], array(self::JSON_SLICE, self::JSON_IN_ARR, self::JSON_IN_OBJ)) | |
) { | |
// found a left-brace, and we are in an array, object, or slice | |
$stk[] = array('what' => self::JSON_IN_OBJ, 'where' => $c, 'delim' => false); | |
//print("Found start of object at {$c}\n"); | |
} elseif (($chrs{$c} == '}') && ($top['what'] == self::JSON_IN_OBJ)) { | |
// found a right-brace, and we're in an object | |
array_pop($stk); | |
//print("Found end of object at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n"); | |
} elseif (($substr_chrs_c_2 == '/*') && | |
in_array($top['what'], array(self::JSON_SLICE, self::JSON_IN_ARR, self::JSON_IN_OBJ)) | |
) { | |
// found a comment start, and we are in an array, object, or slice | |
$stk[] = array('what' => self::JSON_IN_CMT, 'where' => $c, 'delim' => false); | |
$c++; | |
//print("Found start of comment at {$c}\n"); | |
} elseif (($substr_chrs_c_2 == '*/') && ($top['what'] == self::JSON_IN_CMT)) { | |
// found a comment end, and we're in one now | |
array_pop($stk); | |
$c++; | |
for ($i = $top['where']; $i <= $c; ++$i) | |
$chrs = substr_replace($chrs, ' ', $i, 1); | |
//print("Found end of comment at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n"); | |
} | |
} | |
if (reset($stk) == self::JSON_IN_ARR) { | |
return $arr; | |
} elseif (reset($stk) == self::JSON_IN_OBJ) { | |
return $obj; | |
} | |
} | |
} | |
} | |
} | |
function nativeCase($array) | |
{ | |
$encode = json_encode($array); | |
$decode = json_decode($encode); | |
return $decode; | |
} | |
function cjsonCase($array) | |
{ | |
$encode = CJSON::encode($array); | |
$decode = CJSON::decode($encode); | |
return $decode; | |
} | |
$time_start = microtime(true); | |
nativeCase($array); | |
//cjsonCase($array); | |
$time_end = microtime(true); | |
$execution_time = ($time_end - $time_start); | |
var_dump("Execution time: " . $execution_time); | |
var_dump((memory_get_usage(true) / 1000000) . " MB"); | |
var_dump((memory_get_peak_usage(true) / 1000000) . " MB"); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment