Skip to content

Instantly share code, notes, and snippets.

@david0
Last active June 27, 2022 15:48
Show Gist options
  • Save david0/c5bdfc68bc48565bb22d to your computer and use it in GitHub Desktop.
Save david0/c5bdfc68bc48565bb22d to your computer and use it in GitHub Desktop.
proof of concept for deterministic/reproduceable Zip file checksums
proof of concept to make Zip-file checksums more reliable
related to https://github.com/composer/composer/issues/2540
#!/bin/sh
mkdir testdata
dd if=/dev/random of=testdata/data1 count=100
dd if=/dev/random of=testdata/data2 count=100
rm testdata*.zip
zip -r testdata1_extra.zip testdata/
sleep 1 && touch -a testdata/* # force change atime
zip -r testdata2_extra.zip testdata/ #same as testdata1_extra.zip, different atime
zip -r -X testdata3_noextra.zip testdata/
sleep 1 && touch -a testdata/* # force change atime
zip -r -X testdata4_noextra.zip testdata/
dd if=/dev/random of=testdata/data2 count=100
zip -r testdata5_corrupted_extra.zip testdata/
zip -r -X testdata5_corrupted_noextra.zip testdata/
shasum testdata*.zip
<?php
// Test script to mask out the "extra" part of the zip file header
class ZipHasher
{
const LOCAL_FILE_HEADER_SIGNATURE = 0x04034b50;
const CENTRAL_DIRECTORY_STRUCTURE_HEADER = 0x2014b50;
const CENTRAL_DIRECTORY_STRUCTURE_END = 0x06054b50;
const LOCAL_FILE_HEADER_FORMAT = 'vversion/vgeneral/vcmethod/vlastmodtime/vlastmoddate/Vcrc/VcompressedSize/VuncompressedSize/vfilenameLen/vextraLen';
const CENTRAL_DIRECTORY_STRUCTURE_FORMAT = 'vcversion/vrequiredVersion/vflag/vcmethod/vlastmodtime/vlastmoddate/Vcrc/VcompressedSize/VuncompressedSize/vfilenameLen/vextraLen/vcommentLen/vdisk/vinternalFileAttributes/VexternalFileAttributes/Voffset';
const END_OF_CENTRAL_DIRECTORY_STRUCTURE_FORMAT = 'vdiskNr/vdiskCdsStart/vtotalEntries/Vsize/Voffset/VcommentLen';
/**
* Hash Zip file and ignore sections "extra" and "central directory structure"
* because we interested in content only
*
* @param string $filename
* @return string the sha
*/
public function hash($filename)
{
$contents = '';
$fp = fopen($filename, 'rb');
while (!feof($fp)) {
$structure = $this->unpack($fp, 'Vsignature');
$signature = $structure['signature'];
$contents .= pack('V', $signature);
if ($signature == self::LOCAL_FILE_HEADER_SIGNATURE) {
$localFileHeader = $this->readLocalFileHeader($fp);
$localFileHeader['extraLen'] = 0;
$localFileHeader['extra'] = '';
$contents .= $this->packLocalFileHeader($localFileHeader);
if ($localFileHeader['compressedSize'] > 0)
$contents .= fread($fp, $localFileHeader['compressedSize']);
} elseif ($signature == self::CENTRAL_DIRECTORY_STRUCTURE_HEADER) {
$cds = $this->readCentralDirectoryStructure($fp);
$cds['extraLen'] = 0;
$cds['extra'] = '';
$cds['offset'] = 0;
$contents .= $this->packCentralDirectoryStructure($cds);
} elseif ($signature == self::CENTRAL_DIRECTORY_STRUCTURE_END) {
$cdsEnd = $this->readEndOfCentralDirectoryHeader($fp);
$cdsEnd['size'] = 0;
$cdsEnd['offset'] = 0;
$contents .= $this->packEndOfCentralDirectoryHeader($cdsEnd);
} else
throw new RuntimeException('unexpected ' . bin2hex($signature));
}
fclose($fp);
return sha1($contents);
}
public function readLocalFileHeader($fp)
{
$structure = $this->unpack($fp, self::LOCAL_FILE_HEADER_FORMAT);
$structure['filename'] = $structure['filenameLen'] > 0 ? fread($fp, $structure['filenameLen']) : '';
$structure['extra'] = $structure['extraLen'] > 0 ? fread($fp, $structure['extraLen']) : '';
return $structure;
}
private function packLocalFileHeader(array $header)
{
$headerStr = $this->pack(self::LOCAL_FILE_HEADER_FORMAT, $header);
$headerStr .= $header['filename'];
$headerStr .= $header['extra'];
return $headerStr;
}
private function readCentralDirectoryStructure($fp)
{
$structure = $this->unpack($fp, self::CENTRAL_DIRECTORY_STRUCTURE_FORMAT);
$structure['filename'] = $structure['filenameLen'] > 0 ? fread($fp, $structure['filenameLen']) : '';
$structure['extra'] = $structure['extraLen'] > 0 ? fread($fp, $structure['extraLen']) : '';
$structure['comment'] = $structure['commentLen'] > 0 ? fread($fp, $structure['commentLen']) : '';
return $structure;
}
private function packCentralDirectoryStructure(array $header)
{
$headerStr = $this->pack(self::CENTRAL_DIRECTORY_STRUCTURE_FORMAT, $header);
$headerStr .= $header['filename'];
$headerStr .= $header['extra'];
$headerStr .= $header['comment'];
return $headerStr;
}
private function readEndOfCentralDirectoryHeader($fp)
{
$structure = $this->unpack($fp, self::END_OF_CENTRAL_DIRECTORY_STRUCTURE_FORMAT);
$structure['comment'] = $structure['commentLen'] > 0 ? fread($fp, $structure['commentLen']) : '';
return $structure;
}
private function packEndOfCentralDirectoryHeader(array $header)
{
$headerStr = $this->pack(self::END_OF_CENTRAL_DIRECTORY_STRUCTURE_FORMAT, $header);
$headerStr .= $header['comment'];
return $headerStr;
}
private function unpack($fp, $format)
{
$expectedLen = 0;
foreach (explode('/', $format) as $entry) {
static $sizes = array('v' => 2, 'V' => 4, 'n'=>2);
$expectedLen += $sizes[$entry[0]];
}
$data = fread($fp, $expectedLen);
if (strlen($data) !== $expectedLen)
throw new RuntimeException('format error: unexpected ' . bin2hex($data));
return unpack($format, $data);
}
private function pack($format, array $header)
{
$formatStr = '';
$args = array();
foreach (explode('/', $format) as $entry) {
$formatStr .= $entry[0];
$key = substr($entry, 1);
$args[] = $header[$key];
}
array_unshift($args, $formatStr); // first parameter: format
return call_user_func_array('pack', $args);
}
}
$zipHasher = new ZipHasher();
foreach (glob('*.zip') as $filename)
printf("%s: %s\n", $filename, $zipHasher->hash($filename));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment