Last active
March 28, 2022 16:03
-
-
Save erikyo/c9365b0bc49d599f1be14b2f7feeed91 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
ini_set('display_errors', '1'); | |
ini_set('display_startup_errors', '1'); | |
error_reporting(E_ALL); | |
const MINIMUM_CHUNK_HEADER_LENGTH = 18; | |
const VP8X_ALPHA = 16; | |
const VP8X_ANIM = 2; | |
const VP8X_EXIF = 8; | |
const VP8X_ICC = 32; | |
const VP8X_XMP = 4; | |
// RIFF | |
// credits https://doc.wikimedia.org/mediawiki-core/1.27.3/php/classRiffExtractor.html | |
function decodeLossyChunkHeader( $header ) { | |
// Bytes 0-3 are 'VP8 ' | |
// Bytes 4-7 are the VP8 stream size | |
// Bytes 8-10 are the frame tag | |
// Bytes 11-13 are 0x9D 0x01 0x2A called the sync code | |
$syncCode = substr( $header, 11, 3 ); | |
if ( $syncCode != "\x9D\x01\x2A" ) { | |
print_r( 'WebP decodeLossyChunkHeader Invalid sync code: '. bin2hex( $syncCode ) . "\n" ); | |
return []; | |
} | |
// Bytes 14-17 are image size | |
$imageSize = unpack( 'v2', substr( $header, 14, 4 ) ); | |
// Image sizes are 14 bit, 2 MSB are scaling parameters which are ignored here | |
return [ | |
'compression' => 'lossy', | |
'width' => $imageSize[1] & 0x3FFF, | |
'height' => $imageSize[2] & 0x3FFF | |
]; | |
} | |
function decodeLosslessChunkHeader( $header ) { | |
// Bytes 0-3 are 'VP8L' | |
// Bytes 4-7 are chunk stream size | |
// Byte 8 is 0x2F called the signature | |
if ( $header[8] != "\x2F" ) { | |
print_r( 'Invalid signature: ' . bin2hex( $header[8] ) . "\n" ); | |
return []; | |
} | |
// Bytes 9-12 contain the image size | |
// Bits 0-13 are width-1; bits 15-27 are height-1 | |
$imageSize = unpack( 'C4', substr( $header, 9, 4 ) ); | |
return [ | |
'compression' => 'lossless', | |
'width' => ( $imageSize[1] | ( ( $imageSize[2] & 0x3F ) << 8 ) ) + 1, | |
'height' => ( ( ( $imageSize[2] & 0xC0 ) >> 6 ) | ( $imageSize[3] << 2 ) | ( ( $imageSize[4] & 0x03 ) << 10 ) ) + 1 | |
]; | |
} | |
function decodeExtendedChunkHeader( $header ) { | |
// Bytes 0-3 are 'VP8X' | |
// Byte 4-7 are chunk length | |
// Byte 8-11 are a flag bytes | |
$flags = unpack( 'c', substr( $header, 8, 1 ) ); | |
// Byte 12-17 are image size (24 bits) | |
$width = unpack( 'V', substr( $header, 12, 3 ) . "\x00" ); | |
$height = unpack( 'V', substr( $header, 15, 3 ) . "\x00" ); | |
return [ | |
'compression' => 'unknown', | |
'animated' => ( $flags[1] & VP8X_ANIM ) == VP8X_ANIM, | |
'transparency' => ( $flags[1] & VP8X_ALPHA ) == VP8X_ALPHA, | |
'EXIF' => ( $flags[1] & VP8X_EXIF ) == VP8X_EXIF, | |
'ICC' => ( $flags[1] & VP8X_ICC ) == VP8X_ICC, | |
'XMP' => ( $flags[1] & VP8X_XMP ) == VP8X_XMP, | |
'width' => ( $width[1] & 0xFFFFFF ) + 1, | |
'height' => ( $height[1] & 0xFFFFFF ) + 1 | |
]; | |
} | |
function decodeExifChunkHeader($img_metadata) { | |
// EXIF | |
// TODO: here the first bug! sometimes the exif header is jfif like and needs to be parsed in the "old" fashioned way (TLDR. it's shifted of 4byte) | |
$header_format = 'A4type/' . // get 4 string | |
'I1size/' // get 4 string | |
; | |
$header = unpack($header_format, substr( $img_metadata, 0, 8 )); | |
// fetch header in order to find "0000008" that marks the beginning of exif data before the idf count (what we need) | |
$meta_chunk = unpack( 'H40', substr( $img_metadata, 8, 20 ) )[1]; | |
$exifstart = strpos($meta_chunk, "00000008"); | |
$exif_start_shift = ($exifstart === 8 ) ? 8 : 8 + (($exifstart - 8) * .5); | |
$header_riff_format = | |
'A2byte_order/' . // 2byte get 4 string | "II" (4949.H) (little endian) or "MM" (4D4D.H) (big endian) | |
'H4fixed42/' . // 2byte get 4 string | magic number 42 fixed 002A.h | |
'H8offset/'. // 4byte get 4 string | 0th IFD offset. If the TIFF header is followed immediately by the 0th IFD, it is written as 00000008.H. | |
'H*idf_count/'; // the count of identiers to read in the next function | |
$metadata = array_merge( | |
$header, | |
unpack($header_riff_format, substr( $img_metadata, $exif_start_shift, 10 )), | |
array('orientation' => '') | |
// array( | |
// 'debug_exif_start_position'=> $exifstart, | |
// 'debug_exif_shift_position'=> $exif_start_shift - 8, | |
// ) | |
); | |
// TODO: the count is wrong... the reason is that value needs to be decoded from hex with some rules | |
// (following the description of the field) | |
// The number of values. It should be noted carefully that the count is not the sum of the bytes. In the case of one | |
// value of SHORT (16 bits), for example, the count is '1' even though it is 2 bytes. | |
$metadata['idf_count'] = hexdec($metadata['idf_count']); | |
for ( $i = 0; $i <= $metadata['idf_count'] - 1; $i++ ) { | |
// Read the next 12 bytes each loop | |
$exif_raw = substr( $img_metadata, 10 + $exif_start_shift + ( 12 * $i ), 12 ); | |
// Unpack 12bytes as 24hex values into char string | |
$meta_chunk = unpack( 'H24', $exif_raw )[1]; | |
// Split the hex string into | |
$meta_chunk_tag = substr($meta_chunk, 0, 4); | |
$meta_chunk_offset = hexdec(substr($meta_chunk, 16, 8)); | |
$meta_chunk_count = hexdec(substr($meta_chunk, 8, 8)); // the number of values (string length) | |
// TODO: If the value is smaller than 4 bytes, | |
// the value is stored in the 4-byte area starting from the left, | |
// i.e., from the lower end of the byte offset area | |
if ($meta_chunk_tag == '0112') { | |
$metadata['orientation'] = substr($meta_chunk, 16, 4); | |
} | |
// saves the hex decoded data | |
$metadata["dataset"][$i] = array( | |
'tag' => $meta_chunk_tag, | |
'type' => substr($meta_chunk, 4, 4), // 2bit TYPE: 0-1 Tag | 2-3 type | 4-7 Count | 8-11 value offset | |
'value' => substr( $img_metadata, $meta_chunk_offset, $meta_chunk_count ), // 4bit the item value - 1 byte 8bit uint | 2 ascii 8byte with 7bit ascii code | 3 short 16bit uint | 4 long 32bit uint | 5 rational long/long | 7 undefined 8bit any | 9 SLONG 4byte singed int | 10 SRATIONAL SLONG/SLONG | |
'raw_value_data' => array( 'hex' => $meta_chunk, 'offset' => $meta_chunk_offset, 'count' => $meta_chunk_count), | |
); | |
} | |
return $metadata; | |
} | |
function decodeIccpChunkHeader($img_metadata) { | |
// ITPC PARSE | |
// https://www.color.org/icc_specs2.xalter | |
// https://www.color.org/specification/ICCSpecRevision_25-02-10_dictType.pdf | |
// https://www.color.org/icc32.pdf (definitions near page 80) | |
$header_format = 'A4type/' . // get 4 string | |
'I1size/' ; // get 1byte integer | |
$metadata['parsed']['header'] = unpack($header_format, substr( $img_metadata, 0, 8 )); | |
$metadata['parsed']['raw-header'] = $raw_header = substr( $img_metadata, 8, 128 ); | |
$metadata['parsed']['raw-body'] = $raw_body = substr( $img_metadata, 128 ); | |
$iccp_format = 'Z4tag/' . | |
'Noffset/' . | |
'Nlength/'; | |
for ($i = 1; $i <= 10; $i++) { | |
$parsed_iccp[$i] = unpack($iccp_format, substr( $raw_body, 12*$i, 12 )); | |
$parsed_iccp[$i]['data'] = substr( | |
$img_metadata, | |
$parsed_iccp[$i]['offset'] + 8, | |
$parsed_iccp[$i]['length'] | |
); | |
$metadata["body-$i"] = array( | |
"tag" => substr( $parsed_iccp[$i]['tag'], 0, 4 ), | |
"type" => substr( $parsed_iccp[$i]['data'], 0, 4 ), | |
"data" => substr( $parsed_iccp[$i]['data'], 4 ) | |
); | |
} | |
$metadata['icc'] = "ICC profile present"; | |
// check for icc profile | |
if ( substr( $raw_header, 36, 4 ) != 'acsp' ) { | |
$metadata['icc'] = "ICC profile INVALID (no acsp flag) " .substr( $raw_header, 32, 4 ); | |
} | |
// invalid ICC profile | |
else { | |
$input = substr( $raw_header, 16, 4 ); | |
$output = substr( $raw_header, 20, 4 ); | |
$metadata['icc-input'] = 'ICC profile Input: ' . $input; | |
$metadata['icc-output'] = 'ICC profile Output: ' . $output; | |
// Ignore Color profiles for conversion to other color-spaces e.g. CMYK/Lab | |
if ( $input != 'RGB ' || $output != 'XYZ ' ) { | |
$metadata['icc'] = 'ICC profile ignored'; | |
} | |
} | |
return $metadata; | |
} | |
function decodeXmpChunkHeader($img_metadata) { | |
// XMP PROFILE | |
// https://en.wikipedia.org/wiki/Extensible_Metadata_Platform | |
// https://web.archive.org/web/20180919181934/http://www.metadataworkinggroup.org/pdf/mwg_guidance.pdf | |
// https://github.com/jeroendesloovere/xmp-metadata-extractor/blob/master/src/XmpMetadataExtractor.php | |
$header_format = 'A4type/' . // get 4 string | |
'Vsize/'; // get 4 string | |
$metadata['XMP-parsed'] = unpack($header_format, substr( $img_metadata, 0, 8 )); | |
$metadata["XMP-raw"] = $xmp_raw = utf8_encode(htmlspecialchars(substr( $img_metadata, 8))); | |
return $metadata; | |
} | |
function find_chunks( $file, $fileSize, $fourCC, $maxChunks = -1 ) { | |
// Create basic info structure | |
$info = [ | |
'fileSize' => readUInt32($fileSize), | |
'fourCC' => $fourCC, | |
'chunks' => [], | |
]; | |
$numberOfChunks = 0; | |
// Find out the chunks | |
while ( ! feof( $file ) && ! ( $numberOfChunks >= $maxChunks && $maxChunks >= 0 ) ) { | |
$chunkStart = ftell( $file ); | |
$chunkFourCC = fread( $file, 4 ); | |
if ( ! $chunkFourCC || strlen( $chunkFourCC ) != 4 ) { | |
return $info; | |
} | |
$chunkSize = fread( $file, 4 ); | |
if ( ! $chunkSize || strlen( $chunkSize ) != 4 ) { | |
return $info; | |
} | |
$intChunkSize = readUInt32( $chunkSize ); | |
// Add chunk info to the info structure | |
$info['chunks'][] = [ | |
'fourCC' => $chunkFourCC, | |
'start' => $chunkStart, | |
'size' => $intChunkSize | |
]; | |
// Uneven chunks have padding bytes | |
$padding = $intChunkSize % 2; | |
// Seek to the next chunk | |
fseek( $file, $intChunkSize + $padding, SEEK_CUR ); | |
} | |
return $info; | |
} | |
function readXChar($handle, $length){ | |
return readUnpack($handle, 'a*', $length); | |
} | |
function readHex($handle){ | |
return readUnpack($handle, 'h', 1); | |
} | |
function read32($handle){ | |
return readUnpack($handle, 'V', 4); | |
} | |
function read24($handle){ | |
return readUnpack($handle, 'V', 3) . "\x00" ; | |
} | |
function read16($handle){ | |
return readUnpack($handle, 'V', 2); | |
} | |
function readUInt32( $handle ) { | |
return unpack( 'V', $handle )[1]; | |
} | |
function readInt($handle){ | |
$data = unpack('I1', fread($handle, 4)); | |
return intval($data[1]); | |
} | |
function readUnpack($handle, $type, $length){ | |
$data = unpack($type, fread($handle, $length)); | |
return array_pop($data); | |
} | |
function _fourbytes2int($s) { | |
//Read a 4-byte integer from string | |
return (ord($s[0])<<24) + (ord($s[1])<<16) + (ord($s[2])<<8) + ord($s[3]); | |
} | |
function _twobytes2int($s) { // equivalent to _get_ushort | |
//Read a 2-byte integer from string | |
return (ord(substr($s, 0, 1))<<8) + ord(substr($s, 1, 1)); | |
} | |
function readBits($value, $start, $end) { | |
$mask = ( 1 << ( $end - $start ) ) - 1; | |
return ( $value >> $start ) & $mask; | |
} | |
// Util function to humanize filesize | |
function formatBytes($size, $precision = 2) { | |
$base = log($size, 1024); | |
$suffixes = array('', 'K', 'M', 'G', 'T'); | |
return round(pow(1024, $base - floor($base)), $precision) . $suffixes[floor($base)]; | |
} | |
function getImageRiff($filename) { | |
$fh = fopen($filename, 'rb'); | |
$meta['RIFF'] = readXChar($fh, 4); // read 32 bits - 4 char | riff header | |
if ($meta['RIFF'] !== 'RIFF') { | |
$meta['header'] = readXChar($fh, 32); | |
$meta['jfif'] = substr( $meta['header'], 2, 4 ); | |
if ($meta['jfif'] === 'JFIF') { | |
return htmlentities($meta['jfif']) . " isn't a webp riff format... check the header probably it's a jpg since it contains the jfif signature (TLDR HH - https://dev.exiv2.org/projects/exiv2/wiki/The_Metadata_in_JPEG_files) -> " . $meta['RIFF'].$meta['header']; | |
} else { | |
if ($meta['RIFF'] === 'GIF8' ) { | |
return $meta['RIFF'] . " isn't a webp riff format... check the header probably it's a gif since it contains the GIF file signature -> " . $meta['RIFF'].$meta['header']; | |
} | |
if (substr( $meta['header'], 8, 4 ) === 'IHDR') { | |
$meta['jfif'] = substr( $meta['header'], 8, 4 ); | |
return $meta['jfif'] . " isn't a webp riff format... check the header probably it's a png since it contains the png file signature (TLDR IHDR - https://www.w3.org/TR/PNG-Structure.html) The first eight bytes of a PNG file always contain the following (decimal) values:". | |
"<br/>(137 80 78 71 13 10 26 10)". | |
"<br/>This signature indicates that the remainder of the file contains a single PNG image, consisting of a series of chunks beginning with an IHDR chunk and ending with an IEND chunk. -> " . $meta['RIFF'].$meta['header']; | |
} | |
return htmlspecialchars($meta['RIFF']).substr( $meta['header'], 0, 4 ) . " unknown signature -> " . $meta['header']; | |
} | |
} | |
$meta['Filesize'] = readInt($fh); // read 32 bits (uint32) | the whole file size | |
$meta['Webp'] = readXChar($fh, 4); // read 32 bits - 4 char | webp extension | |
$raw = find_chunks( $fh, $meta['Filesize'], $meta['Webp'], -1 ); | |
$meta['readeable-filesize'] = formatBytes($meta['Filesize']); | |
$meta['chunks-flat'] = array_column($raw['chunks'], 'fourCC'); | |
foreach ( $raw['chunks'] as $k => $chunk ) { | |
$chunkKey = $k . '_' . $chunk['fourCC']; | |
if ( in_array( $chunk['fourCC'], array( 'VP8 ', 'VP8L', 'VP8X'))){ | |
$chunkHeader = file_get_contents( $filename, false, null, $chunk['start'], MINIMUM_CHUNK_HEADER_LENGTH ); | |
if ( $chunk['fourCC'] == 'VP8 ' ) { | |
$meta[ $chunkKey ] = decodeLossyChunkHeader( $chunkHeader ); | |
} else if ( $chunk['fourCC'] == 'VP8L' ) { | |
$meta[ $chunkKey ] = decodeLosslessChunkHeader( $chunkHeader ); | |
} else if ( $chunk['fourCC'] == 'VP8X' ) { | |
$meta[ $chunkKey ] = decodeExtendedChunkHeader( $chunkHeader ); | |
} | |
} else if ( in_array( $chunk['fourCC'], array( 'ICCP', 'ANIM', 'XMP ', 'EXIF' ) ) ) { //'ANIF' | |
$meta[$chunkKey] = array( $chunk['fourCC'] => 'start: '. $chunk['start'] . ' length:' . $chunk['size'] ); | |
$img_metadata = file_get_contents( $filename, false, null, $chunk['start'], $chunk['size'] + 8 ); | |
if ($chunk['fourCC'] === 'EXIF') { | |
// $meta[ $chunkKey ]["EXIF-raw"] = $img_metadata; | |
$meta[ $chunkKey ] = decodeExifChunkHeader($img_metadata); | |
} else if ($chunk['fourCC'] === 'ICCP') { | |
$meta[ $chunkKey ] = decodeIccpChunkHeader($img_metadata); | |
} else if ( $chunk['fourCC'] === 'XMP ' ) { | |
$meta[ $chunkKey ] = decodeXmpChunkHeader($img_metadata); | |
} | |
} | |
// the other metas | |
else if ( $chunk['fourCC'] === 'ANMF' ) { | |
$meta['frames'][ $chunkKey ] = "frame" . $k; | |
} else { | |
$meta[ $chunkKey ]['missed'][] = '"'. $chunk['fourCC'] . '"'; | |
} | |
} | |
return $meta; | |
} | |
$upload_dir = __DIR__; | |
$images = array( | |
"icc-lossy.webp", | |
"icc-lossless.webp", | |
"example.webp", | |
"arrow.webp", | |
"nyan.webp", | |
"WebP-lossless.webp", | |
"WebP-lossy.webp" | |
); | |
echo '<pre>'; | |
foreach ($images as $id => $image) { | |
$image_path = $upload_dir .'/'.$image; | |
$image_url = "./" . $image; | |
$image_data = getImageRiff( $image_path ); | |
printf( '<div><img loading="lazy" id="%s" src="%s" /><p> %s (%s) </p> %s </div>', $id, $image_url, $image, $image_url, print_r($image_data, true) ); | |
} | |
echo '</pre>'; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment