Last active
October 26, 2021 08:34
-
-
Save fox34/2503bc11cfc1f1001a919ac655e48706 to your computer and use it in GitHub Desktop.
Read last chunk of concatenated gzip file in PHP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
declare(strict_types=1); | |
/** | |
* https://gist.github.com/fox34/2503bc11cfc1f1001a919ac655e48706 | |
* | |
* Multiple gzip files can simply be concatenated. This function tries to extract the last chunk of a concatenated gzip file. | |
* Works by searching for the magic number 1f 8b | |
* Poor performance because of backtracking and multiple re-reads | |
* Thus, use only for smaller chunks of locally available data. | |
* Limited to maximum 1MB of compressed data (uncompressed data may be larger) | |
*/ | |
function gzfile_get_last_chunk_of_concatenated_file(string $file, int $readLimit = 1000000) : string | |
{ | |
// Limit to 1MB | |
$readLimit = min($readLimit, 1e6); | |
$fp = fopen($file, 'rb'); | |
if ($fp === false) { | |
throw new \Exception('Could not read file.'); | |
} | |
fseek($fp, -2, SEEK_END); | |
$gzdata = ''; | |
$data = ''; | |
$counter = 0; | |
// Read chunks of 2 bytes and compare with magic number | |
while (($seq = fread($fp, 2)) && $counter++ < $readLimit) { | |
// magic number not matched | |
if (bin2hex($seq) !== '1f8b') { | |
fseek($fp, -3, SEEK_CUR); | |
continue; | |
} | |
$pos = ftell($fp); | |
$gzdata = $seq; | |
// Read all remaining data | |
while ($chunk = fread($fp, 1024)) { | |
$gzdata .= $chunk; | |
} | |
// Try decoding data | |
$data = @gzdecode($gzdata); | |
// Could not decode data. Maybe magic number appeared inside compressed content? | |
if ($data === false) { | |
$data = ''; | |
fseek($fp, $pos - 3); | |
} else { | |
fclose($fp); | |
return $data; | |
} | |
} | |
fclose($fp); | |
throw new \Exception('Valid chunk not found within provided size limit.'); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment