Skip to content

Instantly share code, notes, and snippets.

@artoodetoo
Last active December 15, 2021 13:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save artoodetoo/5e73ce8e9e81fdae36b7258a1d3f7303 to your computer and use it in GitHub Desktop.
Save artoodetoo/5e73ce8e9e81fdae36b7258a1d3f7303 to your computer and use it in GitHub Desktop.
<?php
class JpegSectionIterator implements \Iterator
{
const
UNKNOWN = 'unknown',
SOI = 'SOI', // Start Of Image
SOF0 = 'SOF0', // Start Of Frame (baseline DCT)
SOF1 = 'SOF1', // Start Of Frame (extended DCT)
SOF2 = 'SOF2', // Start Of Frame (progressive DCT)
DHT = 'DHT', // Define Huffman Table(s)
DQT = 'DQT', // Define Quantization Table(s)
DRI = 'DRI', // Define Restart Interval
SOS = 'SOS', // Start Of Scan
RSTn = 'RSTn', // Restart
APPn = 'APPn', // Application-specific, e.g. an Exif JPEG file uses an APP1
COM = 'COM', // Comment
EOI = 'EOI' // End Of Image
;
private string $content;
private int $contentLength;
private int $offset;
public function __construct(string $content)
{
if (($this->contentLength = strlen($content))< 4) {
throw new \InvalidArgumentException('Wrong JPEG');
}
$this->content = $content;
$this->offset = 0;
}
public function current()
{
return $this->detectCurrentSection();
}
public function key()
{
return $this->offset;
}
public function next()
{
$marker = $this->detectCurrentSection();
$this->offset += $marker['length'];
return $this->offset;
}
public function rewind()
{
$this->offset = 0;
}
public function valid()
{
return isset($this->content[$this->offset]);
}
public function currentSection()
{
$marker = $this->detectCurrentSection();
return substr($this->content, $this->offset, $marker['length']);
}
private function detectCurrentSection()
{
$word = unpack('ni', substr($this->content, $this->offset, 2))['i'];
$name = $this->sectionName($word);
switch ($name) {
// markers followed by payload length
case self::SOF0:
case self::SOF1:
case self::SOF2:
case self::DHT:
case self::DQT:
case self::APPn:
case self::COM:
$length = unpack('ni', substr($this->content, $this->offset + 2, 2))['i'] + 2;
break;
case self::DRI:
$length = 6;
break;
// marker followed by payload length, then number of image components in scan and scan itself
case self::SOS:
$offset = $this->offset + unpack('ni', substr($this->content, $this->offset + 2, 2))['i'] + 2 + 1;
while (true) {
if (($offset = strpos($this->content, "\xff", $offset)) === false) {
$offset = $this->contentLength;
break;
}
$testWord = unpack('ni', substr($this->content, $offset, 2))['i'];
// any marker except internals (like RSTn)
if (!($testWord == 0xFF00 || ($testWord >= 0xFFD0 && $testWord <= 0xFFD7))) {
break;
}
$offset += 2;
}
$length = $offset - $this->offset;
break;
// markers without payload
default:
$length = 2;
}
return compact('name', 'length', 'word');
}
private function sectionName(int $word)
{
$name = self::UNKNOWN;
if ($word == 0xFFD8) {
$name = self::SOI;
} elseif ($word == 0xFFC0) {
$name = self::SOF0;
} elseif ($word == 0xFFC1) {
$name = self::SOF1;
} elseif ($word == 0xFFC2) {
$name = self::SOF2;
} elseif ($word == 0xFFC4) {
$name = self::DHT;
} elseif ($word == 0xFFDB) {
$name = self::DQT;
} elseif ($word == 0xFFDD) {
$name = self::DRI;
} elseif ($word == 0xFFDA) {
$name = self::SOS;
} elseif (($word & 0xFFD0) == 0xFFD0 && (($word & 0x000F) <= 7)) {
$name = self::RSTn;
} elseif (($word & 0xFFE0) == 0xFFE0) {
$name = self::APPn;
} elseif ($word == 0xFFFE) {
$name = self::COM;
} elseif ($word == 0xFFD9) {
$name = self::EOI;
}
return $name;
}
}
<?php
use JpegSectionIterator;
function stripExif($filename)
{
if (finfo_file(finfo_open(FILEINFO_MIME_TYPE), $filename) == 'image/jpeg') {
$content = '';
$changed = false;
$iterator = new JpegSectionIterator(file_get_contents($filename));
foreach ($iterator as $section) {
// Skip application-specific sections (like Exif) and comments
if (in_array($section['name'], [JpegSectionIterator::APPn, JpegSectionIterator::COM])) {
$changed = true;
continue;
}
$content .= $iterator->currentSection();
}
if ($changed) {
file_put_contents($filename, $content);
}
}
}
$ php artisan tinker
Psy Shell v0.10.9 (PHP 7.4.25 — cli) by Justin Hileman
>>> $iter = new JpegSectionIterator(file_get_contents('~/240666275_4734447076631485_5341300544913327001_n.jpg'))
=> JpegSectionIterator {#4179}
>>> foreach($iter as $key => $value) { echo sprintf("%08X %s %04X\n", $key, $value['name'], $value['word']); }
00000000 SOI FFD8
00000002 APPn FFE0
00000014 APPn FFED
00000092 DQT FFDB
000000D7 DQT FFDB
0000011C SOF2 FFC2
0000012F DHT FFC4
0000014D DHT FFC4
00000169 SOS FFDA
00001B28 DHT FFC4
00001B5D SOS FFDA
00004B99 DHT FFC4
00004BC4 SOS FFDA
00004EA2 DHT FFC4
00004ECD SOS FFDA
00005258 DHT FFC4
000052A3 SOS FFDA
000091EC DHT FFC4
00009215 SOS FFDA
0000E56F SOS FFDA
0000EA37 DHT FFC4
0000EA5C SOS FFDA
0000EBEE DHT FFC4
0000EC12 SOS FFDA
0000EE3E DHT FFC4
0000EE67 SOS FFDA
0001588B EOI FFD9
>>> q
Exit: Goodbye
@artoodetoo
Copy link
Author

artoodetoo commented Dec 15, 2021

The aim was to strip EXIF information without IMagic dependency. I believe it can help for other purposes as well.

A bit of information to understand: JPEG file consists of sections. There is no "index" to navigate through the file. We can only read file sequentially. Each section starts with a two-bytes FFxx marker, then optional payload length and payload. The file starts with "SOI" and ends with "EOI" marker without payloads.

As for "SOS" section, it contains the graphical data. But the length field does NOT reflect the data length! It is about some header length only. So, we have to traverse data to find the next FFxx section. It is what JpegSectionIterator class is about.

Thanks to JPEG Wiki and SO topics about file parsing.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment