Skip to content

Instantly share code, notes, and snippets.

@bwoebi
Created June 2, 2015 22:14
Show Gist options
  • Save bwoebi/4fb04b69a733f2e5ed10 to your computer and use it in GitHub Desktop.
Save bwoebi/4fb04b69a733f2e5ed10 to your computer and use it in GitHub Desktop.
static huffman encode/decode (RFC 7541 Appendix B)
<?php
const HPACK_CODE = [
/* 0x00 */ 0x1ff8, 0x7fffd8, 0xfffffe2, 0xfffffe3, 0xfffffe4, 0xfffffe5, 0xfffffe6, 0xfffffe7,
/* 0x08 */ 0xfffffe8, 0xffffea, 0x3ffffffc, 0xfffffe9, 0xfffffea, 0x3ffffffd, 0xfffffeb, 0xfffffec,
/* 0x10 */ 0xfffffed, 0xfffffee, 0xfffffef, 0xffffff0, 0xffffff1, 0xffffff2, 0x3ffffffe, 0xffffff3,
/* 0x18 */ 0xffffff4, 0xffffff5, 0xffffff6, 0xffffff7, 0xffffff8, 0xffffff9, 0xffffffa, 0xffffffb,
/* 0x20 */ 0x14, 0x3f8, 0x3f9, 0xffa, 0x1ff9, 0x15, 0xf8, 0x7fa,
/* 0x28 */ 0x3fa, 0x3fb, 0xf9, 0x7fb, 0xfa, 0x16, 0x17, 0x18,
/* 0x30 */ 0x0, 0x1, 0x2, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
/* 0x38 */ 0x1e, 0x1f, 0x5c, 0xfb, 0x7ffc, 0x20, 0xffb, 0x3fc,
/* 0x40 */ 0x1ffa, 0x21, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
/* 0x48 */ 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a,
/* 0x50 */ 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72,
/* 0x58 */ 0xfc, 0x73, 0xfd, 0x1ffb, 0x7fff0, 0x1ffc, 0x3ffc, 0x22,
/* 0x60 */ 0x7ffd, 0x3, 0x23, 0x4, 0x24, 0x5, 0x25, 0x26,
/* 0x68 */ 0x27, 0x6, 0x74, 0x75, 0x28, 0x29, 0x2a, 0x7,
/* 0x70 */ 0x2b, 0x76, 0x2c, 0x8, 0x9, 0x2d, 0x77, 0x78,
/* 0x78 */ 0x79, 0x7a, 0x7b, 0x7ffe, 0x7fc, 0x3ffd, 0x1ffd, 0xffffffc,
/* 0x80 */ 0xfffe6, 0x3fffd2, 0xfffe7, 0xfffe8, 0x3fffd3, 0x3fffd4, 0x3fffd5, 0x7fffd9,
/* 0x88 */ 0x3fffd6, 0x7fffda, 0x7fffdb, 0x7fffdc, 0x7fffdd, 0x7fffde, 0xffffeb, 0x7fffdf,
/* 0x90 */ 0xffffec, 0xffffed, 0x3fffd7, 0x7fffe0, 0xffffee, 0x7fffe1, 0x7fffe2, 0x7fffe3,
/* 0x98 */ 0x7fffe4, 0x1fffdc, 0x3fffd8, 0x7fffe5, 0x3fffd9, 0x7fffe6, 0x7fffe7, 0xffffef,
/* 0xA0 */ 0x3fffda, 0x1fffdd, 0xfffe9, 0x3fffdb, 0x3fffdc, 0x7fffe8, 0x7fffe9, 0x1fffde,
/* 0xA8 */ 0x7fffea, 0x3fffdd, 0x3fffde, 0xfffff0, 0x1fffdf, 0x3fffdf, 0x7fffeb, 0x7fffec,
/* 0xB0 */ 0x1fffe0, 0x1fffe1, 0x3fffe0, 0x1fffe2, 0x7fffed, 0x3fffe1, 0x7fffee, 0x7fffef,
/* 0xB8 */ 0xfffea, 0x3fffe2, 0x3fffe3, 0x3fffe4, 0x7ffff0, 0x3fffe5, 0x3fffe6, 0x7ffff1,
/* 0xC0 */ 0x3ffffe0, 0x3ffffe1, 0xfffeb, 0x7fff1, 0x3fffe7, 0x7ffff2, 0x3fffe8, 0x1ffffec,
/* 0xC8 */ 0x3ffffe2, 0x3ffffe3, 0x3ffffe4, 0x7ffffde, 0x7ffffdf, 0x3ffffe5, 0xfffff1, 0x1ffffed,
/* 0xD0 */ 0x7fff2, 0x1fffe3, 0x3ffffe6, 0x7ffffe0, 0x7ffffe1, 0x3ffffe7, 0x7ffffe2, 0xfffff2,
/* 0xD8 */ 0x1fffe4, 0x1fffe5, 0x3ffffe8, 0x3ffffe9, 0xffffffd, 0x7ffffe3, 0x7ffffe4, 0x7ffffe5,
/* 0xE0 */ 0xfffec, 0xfffff3, 0xfffed, 0x1fffe6, 0x3fffe9, 0x1fffe7, 0x1fffe8, 0x7ffff3,
/* 0xE8 */ 0x3fffea, 0x3fffeb, 0x1ffffee, 0x1ffffef, 0xfffff4, 0xfffff5, 0x3ffffea, 0x7ffff4,
/* 0xF0 */ 0x3ffffeb, 0x7ffffe6, 0x3ffffec, 0x3ffffed, 0x7ffffe7, 0x7ffffe8, 0x7ffffe9, 0x7ffffea,
/* 0xF8 */ 0x7ffffeb, 0xffffffe, 0x7ffffec, 0x7ffffed, 0x7ffffee, 0x7ffffef, 0x7fffff0, 0x3ffffee,
/* end! */ 0x3fffffff
];
const HPACK_CODE_LENGTHS = [
/* 0x00 */ 13, 23, 28, 28, 28, 28, 28, 28,
/* 0x08 */ 28, 24, 30, 28, 28, 30, 28, 28,
/* 0x10 */ 28, 28, 28, 28, 28, 28, 30, 28,
/* 0x18 */ 28, 28, 28, 28, 28, 28, 28, 28,
/* 0x20 */ 6, 10, 10, 12, 13, 6, 8, 11,
/* 0x28 */ 10, 10, 8, 11, 8, 6, 6, 6,
/* 0x30 */ 5, 5, 5, 6, 6, 6, 6, 6,
/* 0x38 */ 6, 6, 7, 8, 15, 6, 12, 10,
/* 0x40 */ 13, 6, 7, 7, 7, 7, 7, 7,
/* 0x48 */ 7, 7, 7, 7, 7, 7, 7, 7,
/* 0x50 */ 7, 7, 7, 7, 7, 7, 7, 7,
/* 0x58 */ 8, 7, 8, 13, 19, 13, 14, 6,
/* 0x60 */ 15, 5, 6, 5, 6, 5, 6, 6,
/* 0x68 */ 6, 5, 7, 7, 6, 6, 6, 5,
/* 0x70 */ 6, 7, 6, 5, 5, 6, 7, 7,
/* 0x78 */ 7, 7, 7, 15, 11, 14, 13, 28,
/* 0x80 */ 20, 22, 20, 20, 22, 22, 22, 23,
/* 0x88 */ 22, 23, 23, 23, 23, 23, 24, 23,
/* 0x90 */ 24, 24, 22, 23, 24, 23, 23, 23,
/* 0x98 */ 23, 21, 22, 23, 22, 23, 23, 24,
/* 0xA0 */ 22, 21, 20, 22, 22, 23, 23, 21,
/* 0xA8 */ 23, 22, 22, 24, 21, 22, 23, 23,
/* 0xB0 */ 21, 21, 22, 21, 23, 22, 23, 23,
/* 0xB8 */ 20, 22, 22, 22, 23, 22, 22, 23,
/* 0xC0 */ 26, 26, 20, 19, 22, 23, 22, 25,
/* 0xC8 */ 26, 26, 26, 27, 27, 26, 24, 25,
/* 0xD0 */ 19, 21, 26, 27, 27, 26, 27, 24,
/* 0xD8 */ 21, 21, 26, 26, 28, 27, 27, 27,
/* 0xE0 */ 20, 24, 20, 21, 22, 21, 21, 23,
/* 0xE8 */ 22, 22, 25, 25, 24, 24, 26, 23,
/* 0xF0 */ 26, 27, 26, 26, 27, 27, 27, 27,
/* 0xF8 */ 27, 28, 27, 27, 27, 27, 27, 26,
/* end! */ 30
];
// (micro-)optimized decode
$hpack_huffman_lookup = (function() { // do not pollute global space unnecessarily
$encodingAccess = [];
$terminals = [];
$lookup = [];
var_dump(microtime(1));
foreach (HPACK_CODE as $chr => $bits) {
$len = HPACK_CODE_LENGTHS[$chr];
for ($bit = 0; $bit < 8; $bit++) {
$offlen = $len + $bit;
$next = &$encodingAccess[$bit];
for ($byte = (int) (($offlen - 1) / 8); $byte > 0; $byte--) {
$cur = str_pad(decbin(($bits >> ($byte * 8 - (0x30 - $offlen) % 8)) & 0xFF), 8, "0", STR_PAD_LEFT);
if (isset($next[$cur]) && $next[$cur][0] != $encodingAccess[0]) {
$next = &$next[$cur][0];
} else {
$tmp = &$next;
unset($next);
$tmp[$cur] = [&$next, null];
}
}
$key = str_pad(decbin($bits & ((1 << ((($offlen - 1) % 8) + 1)) - 1)), ((($offlen - 1) % 8) + 1), "0", STR_PAD_LEFT);
$next[$key] = [null, $chr > 0xFF ? "" : chr($chr)];
if ($offlen % 8) {
$terminals[$offlen % 8][] = [$key, &$next];
} else {
$next[$key][0] = &$encodingAccess[0];
}
if (isset($encodingAccess[0][1]) && empty($x)) var_dump($chr, $bit, $x=true);
}
}
for ($off = 7; $off > 0; $off--) {
foreach ($terminals[$off] as &$terminal) {
$key = $terminal[0];
$next = &$terminal[1];
if ($next[$key][0] === null) {
foreach ($encodingAccess[$off] as $chr => &$cur) {
$next[str_pad($key, 8, "0", STR_PAD_RIGHT) | $chr] = [&$cur[0], $next[$key][1] != "" ? $next[$key][1] . $cur[1] : ""];
}
unset($next[$key]);
}
}
}
($fn = function (&$arr) use (&$fn, &$encodingAccess) {
ksort($arr);
foreach ($arr as $k => $v) {
$arr[chr(bindec($k))] = $v;
unset($arr[$k]);
}
foreach ($arr as $k => $v) {
if (\strlen(key($v[0])) == 8) { // prevent infinite recursion
$fn($v[0]);
}
}
})($encodingAccess[0]);
return $encodingAccess[0];
})();
function hpack_huffman_decode($input) {
global $hpack_huffman_lookup;
$lookup = $hpack_huffman_lookup;
$len = \strlen($input);
$out = str_repeat("\0", $len / 5 * 8 + 1); // max length
for ($off = $i = 0; $i < $len; $i++) {
list($lookup, $chr) = $lookup[$input[$i]];
if ($chr != null) {
$out[$off++] = $chr;
if (\strlen($chr) != 2) {
continue;
} else {
$out[$off++] = $chr[1];
continue;
}
}
if ($chr === "") {
return null;
}
}
return substr($out, 0, $off);
}
$hpack_huffman_codes = (function() {
$lookup = [];
for ($chr = 0; $chr <= 0xFF; $chr++) {
$bits = HPACK_CODE[$chr];
$len = HPACK_CODE_LENGTHS[$chr];
for ($bit = 0; $bit < 8; $bit++) {
$bytes = floor(($len + $bit - 1) / 8);
for ($byte = $bytes; $byte >= 0; $byte--) {
$lookup[$bit][chr($chr)][] = chr($byte ? $bits >> ($len - ($bytes - $byte + 1) * 8 + $bit) : ($bits << ((0x30 - $len - $bit) % 8)));
}
}
}
return $lookup;
})();
$hpack_huffman_lens = (function() {
$lens = [];
for ($chr = 0; $chr <= 0xFF; $chr++) {
$lens[chr($chr)] = HPACK_CODE_LENGTHS[$chr];
}
return $lens;
})();
function hpack_huffman_encode($input) {
global $hpack_huffman_codes, $hpack_huffman_lens;
$len = \strlen($input);
$out = str_repeat("\0", $len * 5 + 1); // max length
for ($bitcount = $i = 0; $i < $len; $i++) {
$chr = $input[$i];
$byte = $bitcount >> 3;
foreach ($hpack_huffman_codes[$bitcount % 8][$chr] as $bits) {
$out[$byte] = $out[$byte] | $bits;
$byte++;
// can we have $out[$byte++] |= $bits please?
}
$bitcount += $hpack_huffman_lens[$chr];
}
$e = (int) ceil($bitcount / 8);
if ($e != $bitcount / 8) {
$out[$e - 1] = $out[$e - 1] | chr(0xFF >> $bitcount % 8);
}
return substr($out, 0, $e);
}
$data = file_get_contents("/dev/urandom", null, null, null, 10 << 20);
$t = microtime(1);
$enc = hpack_huffman_encode($data);
$nt = microtime(1);
print "Encode ".strlen($data)." bytes into ".strlen($enc)." bytes in ".($nt - $t)." seconds\n";
$t = microtime(1);
$x = hpack_huffman_decode($enc);
$nt = microtime(1);
print "Decoding finished in ".($nt - $t)." seconds\n";
assert($data == $x);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment