-
-
Save JanTvrdik/5b8f1e22f71bac00301e74a6ce46d387 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class Dumper | |
{ | |
/** | |
* @internal | |
* @return string UTF-8 | |
*/ | |
public static function encodeString($s, $maxLength = NULL) | |
{ | |
static $table; | |
if ($table === NULL) { | |
foreach (array_merge(range("\x00", "\x1F"), range("\x7F", "\xFF")) as $ch) { | |
$table[$ch] = '\x' . str_pad(dechex(ord($ch)), 2, '0', STR_PAD_LEFT); | |
} | |
$table['\\'] = '\\\\'; | |
$table["\r"] = '\r'; | |
$table["\n"] = '\n'; | |
$table["\t"] = '\t'; | |
} | |
if (preg_match('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{10FFFF}]#u', $s) || preg_last_error()) { | |
if ($shortened = ($maxLength && strlen($s) > $maxLength)) { | |
$s = substr($s, 0, $maxLength); | |
} | |
$s = strtr($s, $table); | |
} elseif ($maxLength && $s !== '') { | |
if (function_exists('mb_substr')) { | |
$s = mb_substr($tmp = $s, 0, $maxLength, 'UTF-8'); | |
$shortened = $s !== $tmp; | |
} else { | |
$i = $len = 0; | |
do { | |
if (($s[$i] < "\x80" || $s[$i] >= "\xC0") && (++$len > $maxLength)) { | |
$s = substr($s, 0, $i); | |
$shortened = TRUE; | |
break; | |
} | |
} while (isset($s[++$i])); | |
} | |
} | |
return $s . (empty($shortened) ? '' : ' ... '); | |
} | |
/** | |
* @internal | |
* @return string UTF-8 | |
*/ | |
public static function encodeString2($s, $maxLength = NULL) | |
{ | |
static $table; | |
if ($table === NULL) { | |
foreach (array_merge(range("\x00", "\x1F"), range("\x7F", "\xFF")) as $ch) { | |
$table[$ch] = '\x' . str_pad(dechex(ord($ch)), 2, '0', STR_PAD_LEFT); | |
} | |
$table['\\'] = '\\\\'; | |
$table["\r"] = '\r'; | |
$table["\n"] = '\n'; | |
$table["\t"] = '\t'; | |
} | |
$shortened = FALSE; | |
if ($maxLength && function_exists('mb_substr')) { | |
$s = mb_substr($tmp = $s, 0, $maxLength, 'UTF-8'); | |
$shortened = $s !== $tmp; | |
} | |
if (!preg_match('#^[\x09\x0A\x0D\x20-\x7E\xA0-\x{10FFFF}]*+\z#u', $s) || preg_last_error()) { | |
if ($maxLength && strlen($s) > $maxLength) { | |
$s = substr($s, 0, $maxLength); | |
$shortened = TRUE; | |
} | |
$s = strtr($s, $table); | |
} elseif ($maxLength && strlen($s) > $maxLength && !function_exists('mb_substr')) { | |
$i = $len = 0; | |
do { | |
if (($s[$i] < "\x80" || $s[$i] >= "\xC0") && (++$len > $maxLength)) { | |
$s = substr($s, 0, $i); | |
$shortened = TRUE; | |
break; | |
} | |
} while (isset($s[++$i])); | |
} | |
return $s . (empty($shortened) ? '' : ' ... '); | |
} | |
/** | |
* @internal | |
* @return string UTF-8 | |
*/ | |
public static function encodeString3($s, $maxLength = NULL) | |
{ | |
static $table; | |
if ($table === NULL) { | |
foreach (array_merge(range("\x00", "\x1F"), range("\x7F", "\xFF")) as $ch) { | |
$table[$ch] = '\x' . str_pad(dechex(ord($ch)), 2, '0', STR_PAD_LEFT); | |
} | |
$table['\\'] = '\\\\'; | |
$table["\r"] = '\r'; | |
$table["\n"] = '\n'; | |
$table["\t"] = '\t'; | |
} | |
if ($maxLength && $s !== '') { | |
$tmp = $s; | |
if (!preg_match('##u', $s)) { | |
$s = substr($s, 0, $maxLength); | |
} elseif (function_exists('mb_substr')) { | |
$s = mb_substr($s, 0, $maxLength, 'UTF-8'); | |
} else { | |
$i = $len = 0; | |
do { | |
if (($s[$i] < "\x80" || $s[$i] >= "\xC0") && (++$len > $maxLength)) { | |
$s = substr($s, 0, $i); | |
break; | |
} | |
} while (isset($s[++$i])); | |
} | |
if ($s !== $tmp) { | |
$s .= ' ... '; | |
} | |
} | |
if (preg_match('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{10FFFF}]#u', $s) || preg_last_error()) { | |
$s = strtr($s, $table); | |
} | |
return $s; | |
} | |
} | |
$testCount = 1000; | |
$dataSets = [ | |
'100 -> 100' => [$testCount, FALSE, str_repeat('a', 100), 100], | |
'500 -> 100' => [$testCount, FALSE, str_repeat('a', 500), 100], | |
'1e5 -> 100' => [$testCount, FALSE, str_repeat('a', 1e5), 100], | |
'1e5 -> 100 (UTF-8)' => [$testCount, FALSE, str_repeat("\xC4\x9B", 1e5), 100], | |
'1e5 -> 100 (UTF-8 mis-aligned 1)' => [$testCount, FALSE, 'a' . str_repeat("\xC4\x9B", 1e5), 100], | |
'1e5 -> 100 (UTF-8 mis-aligned 2)' => [$testCount, FALSE, 'aa' . str_repeat("\xC4\x9B", 1e5), 100], | |
'1e5 -> 100 (UTF-8 mis-aligned 3)' => [$testCount, FALSE, 'aaa' . str_repeat("\xC4\x9B", 1e5), 100], | |
'1e5 -> 100 (binary 1)' => [$testCount, TRUE, 'a' . str_repeat("\xf0\x90\x28\xbc", 1e5), 100], | |
'1e5 -> 100 (binary 2)' => [$testCount, TRUE, 'a' . str_repeat("\xc3\x28", 1e5), 100], | |
'1e5 -> 100 (binary 3)' => [$testCount, TRUE, 'a' . str_repeat("\xa0\xa1", 1e5), 100], | |
'1e5 -> 100 (binary 4)' => [$testCount, TRUE, 'a' . str_repeat("\xe2\x28\xa1", 1e5), 100], | |
'1e5 -> 100 (binary 5)' => [$testCount, TRUE, 'a' . str_repeat("\xe2\x82\x28", 1e5), 100], | |
'1e5 -> 100 (binary 6)' => [$testCount, TRUE, 'a' . str_repeat("\xf0\x90\x28\xbc", 1e5), 100], | |
'1e5 -> 100 (binary 7)' => [$testCount, TRUE, 'a' . str_repeat("\xf8\xa1\xa1\xa1\xa1", 1e5), 100], | |
'1e5 -> 100 (binary 8)' => [$testCount, TRUE, 'a' . str_repeat("\xfc\xa1\xa1\xa1\xa1\xa1", 1e5), 100], | |
'1e5 -> 100 (binary 9)' => [$testCount, TRUE, 'a' . str_repeat("\xed\xa0\x80", 1e5), 100], | |
'1e5 -> 100 (binary X)' => [$testCount, TRUE, 'a' . str_repeat("\xf0\x82\x82\xac", 1e5), 100], | |
]; | |
$tests = [ | |
'consistency' => function ($count, $isBinary, $s, $maxLength) { | |
$expected = Dumper::encodeString($s); | |
$actualA = Dumper::encodeString2($s); | |
$actualB = Dumper::encodeString3($s); | |
assert($actualA === $expected); | |
assert($actualB === $expected); | |
}, | |
'original' => function ($count, $isBinary, $s, $maxLength) { | |
while ($count--) { | |
Dumper::encodeString($s, $maxLength); | |
} | |
}, | |
'pr' => function ($count, $isBinary, $s, $maxLength) { | |
while ($count--) { | |
Dumper::encodeString2($s, $maxLength); | |
} | |
}, | |
'dg' => function ($count, $isBinary, $s, $maxLength) { | |
while ($count--) { | |
Dumper::encodeString3($s, $maxLength); | |
} | |
}, | |
]; | |
$padLength = max(array_map('strlen', array_keys($dataSets))) + 2; | |
foreach ($tests as $testName => $test) { | |
printf("Test %s:\n", $testName); | |
foreach ($dataSets as $dataSetName => $dataSet) { | |
$time = -microtime(TRUE); | |
$test(...$dataSet); | |
$time += microtime(TRUE); | |
printf(" %s%5.0f ms\n", str_pad($dataSetName, $padLength), $time * 1e3); | |
} | |
printf("\n"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment