Skip to content

Instantly share code, notes, and snippets.

@chobie
Created September 22, 2013 11:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save chobie/6659137 to your computer and use it in GitHub Desktop.
Save chobie/6659137 to your computer and use it in GitHub Desktop.
Pure PHP GZip Deflate implementations (not maintained)
<?php
/**
* Deflate functions
*
* originally written by Takahiko Kawasaki.
* porting by Shuhei Tanuma.
*
* @see http://darutk-oboegaki.blogspot.jp/2011/09/gzip-decompression-in-c.html
*/
class Zlib_Huffman
{
protected $min_code_length;
protected $max_code_length;
protected $biggest_code_values_from_code_length = array();
protected $symbols_from_code_value = array();
public function __construct($code_lengths_from_symbol = array())
{
$tmp = array();
foreach ($code_lengths_from_symbol as $value) {
if (0 < $value) {
$tmp[] = $value;
}
}
if (count($tmp)) {
$min_code_length = min($tmp);
} else {
$min_code_length = 0;
}
$max_code_length = max($code_lengths_from_symbol);
$counts_from_code_length = array_fill(0, $max_code_length + 1, 0);
$count = count($code_lengths_from_symbol);
for ($symbol = 0; $symbol < $count; ++$symbol) {
$code_length = $code_lengths_from_symbol[$symbol];
++$counts_from_code_length[$code_length];
}
$smallest_code_value = 0;
$biggest_code_value = 0;
$counts_from_code_length[0] = 0;
$code_values_from_code_length = array();
$biggest_code_values_from_code_length = array_fill(0, $max_code_length+1, -1);
$count = count($counts_from_code_length);
for ($code_length = 1; $code_length < $count; ++$code_length) {
$previous_count = $counts_from_code_length[$code_length - 1];
$smallest_code_value = ($smallest_code_value + $previous_count) << 1;
$code_values_from_code_length[$code_length] = $smallest_code_value;
$biggest_code_value = $smallest_code_value + $counts_from_code_length[$code_length] - 1;
$biggest_code_values_from_code_length[$code_length] = $biggest_code_value;
}
$symbols_from_code_value = array_fill(0, $max_code_length+1, 0);
$count = count($code_lengths_from_symbol);
for ($symbol = 0; $symbol < $count; ++$symbol) {
$code_length = $code_lengths_from_symbol[$symbol];
if ($code_length != 0) {
$code_value = $code_values_from_code_length[$code_length]++;
$symbols_from_code_value[$code_value] = $symbol;
}
}
$this->min_code_length = $min_code_length;
$this->max_code_length = $max_code_length;
$this->biggest_code_values_from_code_length = $biggest_code_values_from_code_length;
$this->symbols_from_code_value = $symbols_from_code_value;
}
public function readSymbol($input, &$bit_index)
{
for ($code_length = $this->min_code_length;
$code_length <= $this->max_code_length;
++$code_length) {
$biggest_code_value = $this->biggest_code_values_from_code_length[$code_length];
if ($biggest_code_value < 0) {
continue;
}
$code_value = $this->getHuffmanBits($input, $bit_index, $code_length);
if ($biggest_code_value < $code_value) {
continue;
}
$symbol = $this->symbols_from_code_value[$code_value];
$bit_index += $code_length;
return $symbol;
}
throw new Exception(sprintf("Bad code at index %d", $bit_index));
}
public function getHuffmanBits($input, &$bit_index, $n_bits)
{
$number = 0;
$weight = 1;
for ($i = $n_bits - 1; 0 <= $i; --$i, $weight *= 2) {
if ($this->getBit($input, $bit_index + $i)) {
$number += $weight;
}
}
return $number;
}
public function getBit($input, $bit_index)
{
$index = (int)($bit_index / 8);
$shift = $bit_index % 8;
return (ord($input[$index]) & (1 << $shift)) != 0;
}
}
class Zlib_Defalte
{
protected $indices_from_code_length_order = array(16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15);
protected $fixed_literal_length_huffman;
protected $fixed_distance_huffman;
public function __construct()
{
$this->fixed_literal_length_huffman = $this->buildFixedLiteralLengthHuffman();
$this->fixed_distance_huffman = $this->buildFixedDistanceHuffman();
}
public function buildFixedLiteralLengthHuffman()
{
$code_length = array();
for ($symbol = 0; $symbol < 144; ++$symbol) {
$code_length[$symbol] = 8;
}
for (;$symbol < 256; ++$symbol) {
$code_length[$symbol] = 9;
}
for (;$symbol < 280; ++$symbol) {
$code_length[$symbol] = 7;
}
for (;$symbol < 288; ++$symbol) {
$code_length[$symbol] = 8;
}
return new Zlib_Huffman($code_length);
}
public function buildFixedDistanceHuffman()
{
$code_length = array();
for ($symbol = 0; $symbol < 32; ++$symbol) {
$code_length[$symbol] = 5;
}
return new Zlib_Huffman($code_length);
}
public function inflate($buffer, $offset)
{
$bit_offset = $offset * 8;
$output = "";
while ($this->inflateBlock($buffer, $bit_offset, $output));
return $output;
}
public function inflateBlock($buffer, &$bit_index, &$result)
{
$index = $bit_index;
$last = $this->readBit($buffer, $index);
$type = $this->readBits($buffer, $index, 2);
switch ($type) {
case 0:
$this->inflatePlainBlock($buffer, $index, $result);
break;
case 1:
$this->inflateFixedBlock($buffer, $index, $result);
break;
case 2:
$this->inflateDynamicBlock($buffer, $index, $result);
break;
default:
throw new Exception(sprintf("Bad compression type at bit index %d", $index));
}
$bit_index = $index;
if ($last) {
return false;
}
return true;
}
public function inflateDynamicBlock($buffer, &$bit_index, &$output)
{
$hlit = $this->readBits($buffer, $bit_index, 5) + 257;
$hdist = $this->readBits($buffer, $bit_index, 5) + 1;
$hclen = $this->readBits($buffer, $bit_index, 4) + 4;
$code_lengths_from_code_length_value = array_fill(0, 19, 0);
for ($i = 0; $i < $hclen; ++$i) {
$code_length_of_code_length_value = $this->readBits($buffer, $bit_index, 3);
$index = $this->codeLengthOrderToIndex($i);
$code_lengths_from_code_length_value[$index] = $code_length_of_code_length_value;
}
$code_length_huffman = new Zlib_Huffman($code_lengths_from_code_length_value);
$code_lengths_from_literal_length_code = array_fill(0, $hlit, 0);
$this->readCodeLengths($buffer, $bit_index, $code_lengths_from_literal_length_code, $code_length_huffman);
$literal_length_huffman = new Zlib_Huffman($code_lengths_from_literal_length_code);
$code_lengths_from_distance_code = array_fill(0, $hdist, 0);
$this->readCodeLengths($buffer, $bit_index, $code_lengths_from_distance_code, $code_length_huffman);
$distance_huffman = new Zlib_Huffman($code_lengths_from_distance_code);
$this->inflateData($buffer, $bit_index, $output, $literal_length_huffman, $distance_huffman);
}
public function readCodeLengths($input, &$bit_index, &$code_lengths, Zlib_Huffman $code_length_huffman)
{
for ($i = 0; $i < count($code_lengths);$i++) {
$code_length = $code_length_huffman->ReadSymbol($input, $bit_index);
if (0 <= $code_length && $code_length <= 15)
{
$code_lengths[$i] = $code_length;
continue;
}
$repeat_count = 0;
switch ($code_length) {
case 16:
$code_length = $code_lengths[$i - 1];
$repeat_count = $this->readBits($input, $bit_index, 2) + 3;
break;
case 17:
$code_length = 0;
$repeat_count = $this->readBits($input, $bit_index, 3) + 3;
break;
case 18:
$code_length = 0;
$repeat_count = $this->readBits($input, $bit_index, 7) + 11;
break;
default:
throw new Exception();
}
for ($j = 0; $j < $repeat_count; ++$j) {
$code_lengths[$i + $j] = $code_length;
}
$i += $repeat_count - 1;
}
}
public function codeLengthOrderToIndex($order)
{
return $this->indices_from_code_length_order[$order];
}
public function inflatePlainBlock($buffer, &$bit_index, &$output)
{
$bit_index = ($bit_index + 7) & ~7;
$index = (int)($bit_index / 8);
$len = ord($buffer[$index]) + ord($buffer[$index + 1]) * 256;
$index += 4;
$output .= substr($buffer, $index, $len);
$bit_index = ($index + $len) * 8;
}
public function inflateFixedBlock($buffer, &$bit_index, &$output)
{
$this->inflateData($buffer,
$bit_index,
$output,
$this->fixed_literal_length_huffman,
$this->fixed_distance_huffman
);
}
public function inflateData($buffer, &$bit_index, &$output, Zlib_Huffman $literal_length_huffman, Zlib_Huffman $distance_huffman)
{
$tmp = "";
$index = $bit_index;
while (true) {
$literal_length = $literal_length_huffman->readSymbol($buffer, $index);
if ($literal_length == 256) {
$output .= $tmp;
$bit_index = $index;
break;
}
if (0 <= $literal_length && $literal_length <= 255) {
$tmp .= chr($literal_length);
continue;
}
$length = $this->readLength($buffer, $index, $literal_length);
$distance = $this->readDistance($buffer, $index, $distance_huffman);
$this->duplicate($length, $distance, $tmp);
}
}
public function duplicate($length, $distance, &$output)
{
$source = $output;
$source_length = strlen($output);
$target = "";
for ($i = 0; $i < $length; $i++) {
$target .= "\0";
}
$initial_position = $source_length - $distance;
$source_index = $initial_position;
for ($target_index = 0; $target_index < $length; ++$target_index, ++$source_index) {
if ($source_length <= $source_index) {
$source_index = $initial_position;
}
$target[$target_index] = $source[$source_index];
}
$output .= $target;
}
public function readLength($buffer, &$bit_index, $literal_length)
{
$base_value = 0;
$n_bits = 0;
switch ($literal_length) {
case 257:
case 258:
case 259:
case 260:
case 261:
case 262:
case 263:
case 264:
return ($literal_length - 254);
case 265: $base_value = 11; $n_bits = 1; break;
case 266: $base_value = 13; $n_bits = 1; break;
case 267: $base_value = 15; $n_bits = 1; break;
case 268: $base_value = 17; $n_bits = 1; break;
case 269: $base_value = 19; $n_bits = 2; break;
case 270: $base_value = 23; $n_bits = 2; break;
case 271: $base_value = 27; $n_bits = 2; break;
case 272: $base_value = 31; $n_bits = 2; break;
case 273: $base_value = 35; $n_bits = 3; break;
case 274: $base_value = 43; $n_bits = 3; break;
case 275: $base_value = 51; $n_bits = 3; break;
case 276: $base_value = 59; $n_bits = 3; break;
case 277: $base_value = 67; $n_bits = 4; break;
case 278: $base_value = 83; $n_bits = 4; break;
case 279: $base_value = 99; $n_bits = 4; break;
case 280: $base_value = 115; $n_bits = 4; break;
case 281: $base_value = 131; $n_bits = 5; break;
case 282: $base_value = 163; $n_bits = 5; break;
case 283: $base_value = 195; $n_bits = 5; break;
case 284: $base_value = 227; $n_bits = 5; break;
case 285: return 258;
default:
throw new Exception($literal_length);
}
$n = $this->readBits($buffer, $bit_index, $n_bits);
return $base_value + $n;
}
public function readDistance($input, &$bit_index, Zlib_Huffman $distance_huffman)
{
$code = $distance_huffman->readSymbol($input, $bit_index);
$base_value = 0;
$n_bits = 0;
switch ($code) {
case 0:
case 1:
case 2:
case 3:
return $code + 1;
case 4: $base_value = 5; $n_bits = 1; break;
case 5: $base_value = 7; $n_bits = 1; break;
case 6: $base_value = 9; $n_bits = 2; break;
case 7: $base_value = 13; $n_bits = 2; break;
case 8: $base_value = 17; $n_bits = 3; break;
case 9: $base_value = 25; $n_bits = 3; break;
case 10: $base_value = 33; $n_bits = 4; break;
case 11: $base_value = 49; $n_bits = 4; break;
case 12: $base_value = 65; $n_bits = 5; break;
case 13: $base_value = 97; $n_bits = 5; break;
case 14: $base_value = 129; $n_bits = 6; break;
case 15: $base_value = 193; $n_bits = 6; break;
case 16: $base_value = 257; $n_bits = 7; break;
case 17: $base_value = 385; $n_bits = 7; break;
case 18: $base_value = 513; $n_bits = 8; break;
case 19: $base_value = 769; $n_bits = 8; break;
case 20: $base_value = 1025; $n_bits = 9; break;
case 21: $base_value = 1537; $n_bits = 9; break;
case 22: $base_value = 2049; $n_bits = 10; break;
case 23: $base_value = 3073; $n_bits = 10; break;
case 24: $base_value = 4097; $n_bits = 11; break;
case 25: $base_value = 6145; $n_bits = 11; break;
case 26: $base_value = 8193; $n_bits = 12; break;
case 27: $base_value = 12289; $n_bits = 12; break;
case 28: $base_value = 16385; $n_bits = 13; break;
case 29: $base_value = 24577; $n_bits = 13; break;
default:
// Distance codes 30-31 will never actually occur
// in the compressed data, the specification says.
throw new Exception("bit index $bit_index, byte offset " . (int)($bit_index / 8));
}
// Read a value to add to the base value.
$n = $this->readBits($input, $bit_index, $n_bits);
return $base_value + $n;
}
public function getBit($buffer, $bit_index)
{
$index = (int)($bit_index / 8);
$shift = $bit_index % 8;
// Return true if the bit indicated by bitIndex is set.
return (ord($buffer[$index]) & (1 << $shift)) != 0;
}
public function getBits($buffer, $bit_index, $nbits)
{
$number = 0;
$weight = 1;
// Convert consecutive bits into a number.
for ($i = 0; $i < $nbits; ++$i, $weight *= 2) {
// getBit() returns true if the bit is set.
if ($this->getBit($buffer, $bit_index + $i)) {
$number += $weight;
}
}
return $number;
}
public function readBit($buffer, &$bit_index)
{
return $this->getBit($buffer, $bit_index++);
}
public function readBits($buffer, &$bit_index, $nbits)
{
$number = $this->getBits($buffer, $bit_index, $nbits);
$bit_index += $nbits;
return $number;
}
}
function getIndex($buf)
{
if (!(ord($buf[0]) == 0x1F && ord($buf[1])) == 0x8B) {
throw new Exception("not gzip format");
}
if (strlen($buf) < 10) {
throw new Exception();
}
$info = unpack("C2magic_header/Ccompression_method/Cflags/lmtime/Cextra_flags/Cos_type", $buf);
$index = 10;
if (($info['flags'] & 0x4) != 0) {
$xlen = ord($buf[$index]) + (ord($buf[$index+1]) * 256);
$index += 2 + $xlen;
}
if (($info['flags'] & 0x08) != 0) {
while (true) {
if (ord($buf[$index++]) == 0) {
break;
}
}
}
if (($info['flags'] & 0x10) != 0) {
while (true) {
if (!isset($buf[$index+1])) {
throw new Exception();
}
if (ord($buf[$index++]) == 0) {
break;
}
}
}
if (($info['flags'] & 0x02) != 0) {
$index += 2;
}
return $index;
}
$buffer = file_get_contents("hoge.gz");
$index = getIndex($buffer);
$gzip = new Zlib_Defalte();
var_dump($gzip->inflate($buffer, $index));
@mlaak
Copy link

mlaak commented Jan 14, 2016

Hi,
Takahiko Kawasaki has notice
"You can use my implementation freely even for commercial purpose ..."

If you are ok with other people using your work, maybe you can write something like that in a comment in code?
I would really like to use your code in a project, but the licensing must be in order.

Anyhow, usefull stuff, thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment