Skip to content

Instantly share code, notes, and snippets.

@jesseschalken
Created February 5, 2017 13:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jesseschalken/5b5798e87d049c7019af2a840b9b249d to your computer and use it in GitHub Desktop.
Save jesseschalken/5b5798e87d049c7019af2a840b9b249d to your computer and use it in GitHub Desktop.
ord() and chr() for UTF-8
<?php
function utf8_ord(string $s, int $i = 0): int {
$b0 = 0b00000000;
$b1 = 0b10000000;
$b2 = 0b11000000;
$b3 = 0b11100000;
$b4 = 0b11110000;
$b5 = 0b11111000;
$o = \ord($s[$i++]);
if (($o & $b1) == $b0) {
// 0xxxxxxx
return $o;
} else if (($o & $b3) == $b2) {
// 110xxxxx 10xxxxxx
$l = 1;
$r = $o & ~$b3;
} else if (($o & $b4) == $b3) {
// 1110xxxx 10xxxxxx 10xxxxxx
$l = 2;
$r = $o & ~$b4;
} else if (($o & $b5) == $b4) {
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
$l = 3;
$r = $o & ~$b5;
} else {
throw new \Exception();
}
for (; $l > 0; $l--) {
$o = \ord($s[$i++]);
if (($o & $b2) == $b1) {
// 10xxxxxx
$r = ($r << 6) | ($o & ~$b2);
} else {
throw new \Exception();
}
}
return $r;
}
function utf8_chr(int $n): string {
$b0 = 0b00000000;
$b1 = 0b10000000;
$b2 = 0b11000000;
$b3 = 0b11100000;
$b4 = 0b11110000;
$b5 = 0b11111000;
if ($n < 0) {
throw new \Exception();
} else if ($n < (1 << 7)) {
// 7 bits
// 0xxxxxxx
return \chr($n);
} else if ($n < (1 << 11)) {
// 11 bits
// 110xxxxx 10xxxxxx
$l = 1;
$p = $b2;
} else if ($n < (1 << 16)) {
// 16 bits
// 1110xxxx 10xxxxxx 10xxxxxx
$l = 2;
$p = $b3;
} else if ($n < (1 << 21)) {
// 21 bits
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
$l = 3;
$p = $b4;
} else {
throw new \Exception();
}
$s = '';
for (; $l > 0; $l--) {
// 6 bits
// 10xxxxxx
$s = \chr($b1 | ($n & 0xFF & ~$b2)).$s;
$n >>= 6;
}
$s = \chr($p | $n).$s;
return $s;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment