Skip to content

Instantly share code, notes, and snippets.

@msegu
Created July 11, 2018 20:43
Show Gist options
  • Save msegu/de556cdd32eb58430871b06145168024 to your computer and use it in GitHub Desktop.
Save msegu/de556cdd32eb58430871b06145168024 to your computer and use it in GitHub Desktop.
php gzdecode() replacement/extension
<?php
/** function 'gzdecodeM'
* @param data string .GZ data
* @param gz array (optional) Returned array of file sections properties
* @param error string (optional) Returned error (if any)
* @param maxlength int (optional) Max length of uncompressed data
* @param sects string (optional) Sections to uncompress, e.g. '0-3, 5, 8-p100000, 100-' (where: '0' - section number, 'p0' - file .gz position in Bytes, 'x-' - from section x to the end of file)
* Note: sections are extracted once and in the order they are in the .gz file, not how they are listed
* @return Uncompressed data (if OK)
*/
function gzdecodeM($data, &$gz, &$error='', $maxlength=null, $sects='') {
$section = 0;
$member = 0;//gzip member position in $data
$len0 = strlen($data);
if ($sects) {
$sects=explode(',', $sects);
foreach ($sects as &$sect) {
$sect=explode('-', $sect);
$sect[0]=trim($sect[0]);
if (strtolower($sect[0][0])==='p') {$sect[2]=substr($sect[0], 1); $sect[0]='p';}
if(isset($sect[1])) {
$sect[1]=trim($sect[1]);
if (strtolower($sect[1][0])==='p') {$sect[3]=substr($sect[1], 1); $sect[1]='p';}
}
}
unset($sect);
}
do {
$gz[$section]=array();
$len = strlen($data)-$member;
if ($len < 18 || strcmp(substr($data,$member+0,2),"\x1f\x8b")) {
$error = "Not in GZIP format.";
return null; // Not GZIP format (See RFC 1952)
// http://www.faqs.org/rfcs/rfc1952.html
}
$method = $gz[$section]['method'] = ord(substr($data,$member+2,1)); // Compression method
switch ($method) {
case 8:
// Currently the only supported compression method:
break;
default:
$error = "Unknown compression method.";
return false;
}
$flags = $gz[$section]['flags'] = ord(substr($data,$member+3,1)); // Flags
if ($flags & 31 != $flags) {
$error = "Reserved bits not allowed.";
return null;
}
// NOTE: $mtime may be negative (PHP integer limitations)
$gz[$section]['mtime'] = unpack("V", substr($data,$member+4,4));
$gz[$section]['mtime'] = $gz[$section]['mtime'][1];
//$xfl = substr($data,$member+8,1); // @TODO - for future use
$gz[$section]['os'] = ord(substr($data,$member+9,1));
$headerlen = 10;
if ($flags & 4) {
// 2-byte length prefixed EXTRA data in header
if ($len - $headerlen - 2 < 8) {
return false; // invalid
}
$extralen = unpack("v",substr($data,$member+10,2));
$extralen = $extralen[1];
if ($len - $headerlen - 2 - $extralen < 8) {
return false; // invalid
}
$gz[$section]['extra'] = substr($data,$member+12,$extralen);
$headerlen += 2 + $extralen;
}
$filenamelen = 0;
$gz[$section]['filename'] = "";
if ($flags & 8) {
// C-style string
if ($len - $headerlen - 1 < 8) {
return false; // invalid
}
$filenamestart=$headerlen+( isset($extralen) ? 2 : 0 )+$extralen;
$filenamelen = strpos(substr($data,$member+$filenamestart),chr(0));
if ($filenamelen === false || $len - $filenamestart - $filenamelen - 1 < 8) {
return false; // invalid
}
$gz[$section]['filename'] = substr($data,$member+$filenamestart,$filenamelen);
$headerlen =$filenamestart + $filenamelen + 1;
}
$commentlen = 0;
$gz[$section]['comment'] = "";
if ($flags & 16) {
// C-style string COMMENT data in header
if ($len - $headerlen - 1 < 8) {
return false; // invalid
}
$commentlen = strpos(substr($data,$member+$headerlen),chr(0));
if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) {
return false; // Invalid header format
}
$gz[$section]['comment'] = substr($data,$member+$headerlen,$commentlen);
$headerlen += $commentlen + 1;
}
$headercrc = "";
if ($flags & 2) {
// 2-bytes (lowest order) of CRC32 on header present
if ($len - $headerlen - 2 < 8) {
return false; // invalid
}
$calccrc = crc32(substr($data,$member+0,$headerlen)) & 0xffff;
$headercrc = unpack("v", substr($data,$member+$headerlen,2));
$headercrc = $headercrc[1];
if ($headercrc != $calccrc) {
$error = "Header checksum failed.";
return false; // Bad header CRC
}
$headerlen += 2;
}
// compressed blocks extraction...
$restlen = $len-$headerlen-8;
if ($restlen < 1) {
// IMPLEMENTATION BUG!
return null;
}
$rest = $comprssd = substr($data,$member+$headerlen,$restlen);
$next = 8;
$uncomprssd = $members = false;
//Searching for next member
while ( ($next !== false) && ($uncomprssd === false) ) {
// Searching for next gzip-file-members - see https://stackoverflow.com/questions/13112604/find-gzip-start-and-end
$next=strpos($rest, "\x1f\x8b", $next+1);
if ($next > $restlen) $next = false;
if ($next!==false) {
$flags = ord(substr($rest,$next+3,1)); // Flags
if ($flags & 31 == $flags) {
$comprssd = substr($rest, 0, $next-8);
$members = true;
}
} else {
if ($members) $comprssd = $rest;
}
if ($next==8) {
$uncomprssd = ""; // zero-byte body content is allowed
$gz[$section]['size_packed'] = 0;
} else {
// Note: Need to decompress even it's not $this_sect, as a test - to be sure this is proper compressed section
$uncomprssd = gzinflate($comprssd,$maxlength);
$gz[$section]['size_packed'] = strlen($comprssd);
}
}
if ($uncomprssd===false) {
$error = "Compressed data failed or decompression error.";
return false; //error
}
if ($next === false) $next = $restlen+8;
// GZIP FOOTER
$datacrc = unpack("V",substr($data, $member+$headerlen+$next-8, 4));
$datacrc = sprintf('%u',$datacrc[1] & 0xFFFFFFFF);
$gz[$section]['CRC32'] = strtoupper(str_pad(dechex($datacrc), 8, '0', STR_PAD_LEFT));
$isize = unpack("V",substr($data, $member+$headerlen+$next-4, 4));
$isize = $gz[$section]['size'] = $isize[1];
$this_sect = false;
$member0 = $member;
$member += $next + $headerlen;
if ($sects)
foreach ($sects as $sect) if (!$this_sect) {
$this_sect =
(($section==$sect[0])&&(!isset($sect[1])))
|| ( ( // testing initial range
($section>=$sect[0])
|| (($sect[0]==='p')&&($sect[2]<=$member0))
)
&& ( // testing final range
($sect[1]==='') // ...to the end of data
|| ($section<=$sect[1])
|| (($sect[1]==='p')&&($member-1<=$sect[3]))
)
);
}
if (($this_sect)||(!$sects)) {
$wholedata.= $uncomprssd;
// Verifiy CRC32
$crc = sprintf("%u",crc32($uncomprssd));
$gz[$section]['CRC32_OK'] = ( ($crcOK = $crc == $datacrc) ? true : false );
$lenOK = $isize == strlen($uncomprssd);
if (!$lenOK || !$crcOK) {
$error = ( $lenOK ? '' : 'Length check FAILED. ') . ( $crcOK ? '' : 'Checksum FAILED.');
return false;
}
} else $gz[$section]['not_selected'] = 1;
$section++;
} while ($member < $len0);
return $wholedata;
}
// expanded from http://php.net/manual/en/function.gzdecode.php#82930
?>
@madler
Copy link

madler commented Jun 1, 2022

Searching for gzip headers is neither reliable nor necessary. You can instead use inflate_init(), inflate_add(), and inflate_get_read_len(). The latter will tell you how many bytes of the compressed data was consumed to inflate the last gzip member. You start the next inflate there.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment