Skip to content

Instantly share code, notes, and snippets.

@mormegil-cz
Created May 7, 2014 13:15
Show Gist options
  • Save mormegil-cz/4cb3358aa59fa6f991aa to your computer and use it in GitHub Desktop.
Save mormegil-cz/4cb3358aa59fa6f991aa to your computer and use it in GitHub Desktop.
<?php
/**
* Simple emulation of ICU sortkeys for trivial strings (strings containing [A-Za-z0-9] only)
*
* See http://userguide.icu-project.org/collation/architecture#TOC-Sort-Keys
*/
function simple_sort_key($str)
{
$part1 = '';
$part2 = '';
$part3 = '';
$len = strlen($str);
for ($i = 0; $i < $len; ++$i)
{
$c = $str[$i];
if ($c >= '0' && $c <= '9')
{
$part1 .= chr(2 * (ord($c) - ord('0')) + 0x12);
}
else if ($c >= 'a' && $c <= 'z')
{
$part1 .= chr(2 * (ord($c) - ord('a')) + 0x27);
}
else if ($c >= 'A' && $c <= 'Z')
{
$part1 .= chr(2 * (ord($c) - ord('A')) + 0x27);
$part3 .= chr(0x8F);
}
else
{
// ignore unsupported characters (?)
}
}
$part2 .= chr(strlen($part1) + 4 - strlen($part2));
$part3 .= chr(strlen($part1) + 4 - strlen($part3));
return $part1 . chr(0x01) . $part2 . chr(0x01) . $part3 . chr(0x00);
}
function printable_sort_key($sk)
{
$result = '';
for ($i = 0; $i < strlen($sk); ++$i)
{
$c = ord($sk[$i]);
if ($c === 1)
{
$result .= ' / ';
}
else if ($c === 0)
{
$result .= '.';
}
else
{
$h = dechex($c);
while (strlen($h) < 2) $h = '0' . $h;
$result .= $h;
}
}
return $result;
}
echo 'a: ' .printable_sort_key(simple_sort_key('a')) . "<br />\n";
echo 'á: ' .printable_sort_key(simple_sort_key('á')) . "<br />\n";
echo 'č: ' .printable_sort_key(simple_sort_key('č')) . "<br />\n";
echo 'b: ' .printable_sort_key(simple_sort_key('b')) . "<br />\n";
echo '1: ' .printable_sort_key(simple_sort_key('1')) . "<br />\n";
echo 'b1: ' .printable_sort_key(simple_sort_key('b1')) . "<br />\n";
echo 'b11: ' .printable_sort_key(simple_sort_key('b11')) . "<br />\n";
echo 'ab12345: ' .printable_sort_key(simple_sort_key('ab12345')) . "<br />\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment