Skip to content

Instantly share code, notes, and snippets.

@JanTvrdik
Created September 27, 2016 08:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JanTvrdik/5b8f1e22f71bac00301e74a6ce46d387 to your computer and use it in GitHub Desktop.
Save JanTvrdik/5b8f1e22f71bac00301e74a6ce46d387 to your computer and use it in GitHub Desktop.
<?php
class Dumper
{
/**
* @internal
* @return string UTF-8
*/
public static function encodeString($s, $maxLength = NULL)
{
static $table;
if ($table === NULL) {
foreach (array_merge(range("\x00", "\x1F"), range("\x7F", "\xFF")) as $ch) {
$table[$ch] = '\x' . str_pad(dechex(ord($ch)), 2, '0', STR_PAD_LEFT);
}
$table['\\'] = '\\\\';
$table["\r"] = '\r';
$table["\n"] = '\n';
$table["\t"] = '\t';
}
if (preg_match('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{10FFFF}]#u', $s) || preg_last_error()) {
if ($shortened = ($maxLength && strlen($s) > $maxLength)) {
$s = substr($s, 0, $maxLength);
}
$s = strtr($s, $table);
} elseif ($maxLength && $s !== '') {
if (function_exists('mb_substr')) {
$s = mb_substr($tmp = $s, 0, $maxLength, 'UTF-8');
$shortened = $s !== $tmp;
} else {
$i = $len = 0;
do {
if (($s[$i] < "\x80" || $s[$i] >= "\xC0") && (++$len > $maxLength)) {
$s = substr($s, 0, $i);
$shortened = TRUE;
break;
}
} while (isset($s[++$i]));
}
}
return $s . (empty($shortened) ? '' : ' ... ');
}
/**
* @internal
* @return string UTF-8
*/
public static function encodeString2($s, $maxLength = NULL)
{
static $table;
if ($table === NULL) {
foreach (array_merge(range("\x00", "\x1F"), range("\x7F", "\xFF")) as $ch) {
$table[$ch] = '\x' . str_pad(dechex(ord($ch)), 2, '0', STR_PAD_LEFT);
}
$table['\\'] = '\\\\';
$table["\r"] = '\r';
$table["\n"] = '\n';
$table["\t"] = '\t';
}
$shortened = FALSE;
if ($maxLength && function_exists('mb_substr')) {
$s = mb_substr($tmp = $s, 0, $maxLength, 'UTF-8');
$shortened = $s !== $tmp;
}
if (!preg_match('#^[\x09\x0A\x0D\x20-\x7E\xA0-\x{10FFFF}]*+\z#u', $s) || preg_last_error()) {
if ($maxLength && strlen($s) > $maxLength) {
$s = substr($s, 0, $maxLength);
$shortened = TRUE;
}
$s = strtr($s, $table);
} elseif ($maxLength && strlen($s) > $maxLength && !function_exists('mb_substr')) {
$i = $len = 0;
do {
if (($s[$i] < "\x80" || $s[$i] >= "\xC0") && (++$len > $maxLength)) {
$s = substr($s, 0, $i);
$shortened = TRUE;
break;
}
} while (isset($s[++$i]));
}
return $s . (empty($shortened) ? '' : ' ... ');
}
/**
* @internal
* @return string UTF-8
*/
public static function encodeString3($s, $maxLength = NULL)
{
static $table;
if ($table === NULL) {
foreach (array_merge(range("\x00", "\x1F"), range("\x7F", "\xFF")) as $ch) {
$table[$ch] = '\x' . str_pad(dechex(ord($ch)), 2, '0', STR_PAD_LEFT);
}
$table['\\'] = '\\\\';
$table["\r"] = '\r';
$table["\n"] = '\n';
$table["\t"] = '\t';
}
if ($maxLength && $s !== '') {
$tmp = $s;
if (!preg_match('##u', $s)) {
$s = substr($s, 0, $maxLength);
} elseif (function_exists('mb_substr')) {
$s = mb_substr($s, 0, $maxLength, 'UTF-8');
} else {
$i = $len = 0;
do {
if (($s[$i] < "\x80" || $s[$i] >= "\xC0") && (++$len > $maxLength)) {
$s = substr($s, 0, $i);
break;
}
} while (isset($s[++$i]));
}
if ($s !== $tmp) {
$s .= ' ... ';
}
}
if (preg_match('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{10FFFF}]#u', $s) || preg_last_error()) {
$s = strtr($s, $table);
}
return $s;
}
}
$testCount = 1000;
$dataSets = [
'100 -> 100' => [$testCount, FALSE, str_repeat('a', 100), 100],
'500 -> 100' => [$testCount, FALSE, str_repeat('a', 500), 100],
'1e5 -> 100' => [$testCount, FALSE, str_repeat('a', 1e5), 100],
'1e5 -> 100 (UTF-8)' => [$testCount, FALSE, str_repeat("\xC4\x9B", 1e5), 100],
'1e5 -> 100 (UTF-8 mis-aligned 1)' => [$testCount, FALSE, 'a' . str_repeat("\xC4\x9B", 1e5), 100],
'1e5 -> 100 (UTF-8 mis-aligned 2)' => [$testCount, FALSE, 'aa' . str_repeat("\xC4\x9B", 1e5), 100],
'1e5 -> 100 (UTF-8 mis-aligned 3)' => [$testCount, FALSE, 'aaa' . str_repeat("\xC4\x9B", 1e5), 100],
'1e5 -> 100 (binary 1)' => [$testCount, TRUE, 'a' . str_repeat("\xf0\x90\x28\xbc", 1e5), 100],
'1e5 -> 100 (binary 2)' => [$testCount, TRUE, 'a' . str_repeat("\xc3\x28", 1e5), 100],
'1e5 -> 100 (binary 3)' => [$testCount, TRUE, 'a' . str_repeat("\xa0\xa1", 1e5), 100],
'1e5 -> 100 (binary 4)' => [$testCount, TRUE, 'a' . str_repeat("\xe2\x28\xa1", 1e5), 100],
'1e5 -> 100 (binary 5)' => [$testCount, TRUE, 'a' . str_repeat("\xe2\x82\x28", 1e5), 100],
'1e5 -> 100 (binary 6)' => [$testCount, TRUE, 'a' . str_repeat("\xf0\x90\x28\xbc", 1e5), 100],
'1e5 -> 100 (binary 7)' => [$testCount, TRUE, 'a' . str_repeat("\xf8\xa1\xa1\xa1\xa1", 1e5), 100],
'1e5 -> 100 (binary 8)' => [$testCount, TRUE, 'a' . str_repeat("\xfc\xa1\xa1\xa1\xa1\xa1", 1e5), 100],
'1e5 -> 100 (binary 9)' => [$testCount, TRUE, 'a' . str_repeat("\xed\xa0\x80", 1e5), 100],
'1e5 -> 100 (binary X)' => [$testCount, TRUE, 'a' . str_repeat("\xf0\x82\x82\xac", 1e5), 100],
];
$tests = [
'consistency' => function ($count, $isBinary, $s, $maxLength) {
$expected = Dumper::encodeString($s);
$actualA = Dumper::encodeString2($s);
$actualB = Dumper::encodeString3($s);
assert($actualA === $expected);
assert($actualB === $expected);
},
'original' => function ($count, $isBinary, $s, $maxLength) {
while ($count--) {
Dumper::encodeString($s, $maxLength);
}
},
'pr' => function ($count, $isBinary, $s, $maxLength) {
while ($count--) {
Dumper::encodeString2($s, $maxLength);
}
},
'dg' => function ($count, $isBinary, $s, $maxLength) {
while ($count--) {
Dumper::encodeString3($s, $maxLength);
}
},
];
$padLength = max(array_map('strlen', array_keys($dataSets))) + 2;
foreach ($tests as $testName => $test) {
printf("Test %s:\n", $testName);
foreach ($dataSets as $dataSetName => $dataSet) {
$time = -microtime(TRUE);
$test(...$dataSet);
$time += microtime(TRUE);
printf(" %s%5.0f ms\n", str_pad($dataSetName, $padLength), $time * 1e3);
}
printf("\n");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment