Skip to content

Instantly share code, notes, and snippets.

@sjardim
Forked from philipnorton42/Mp3.php
Last active August 7, 2021 10:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save sjardim/32f182be2ab11a53b07633798ada7f04 to your computer and use it in GitHub Desktop.
Save sjardim/32f182be2ab11a53b07633798ada7f04 to your computer and use it in GitHub Desktop.
Mp3 data extractor. See https://www.hashbangcode.com/article/extracting-data-mp3-php for more information.
<?php
//Blog post: https://www.hashbangcode.com/article/extracting-data-mp3-php
//USAGE:
//Tested ID3v2.2 and v2.4 and on UTF-8 and UTF-16LE encondings
// $file = 'audio.mp3';
// $mp3 = new Mp3($file);
// print_r($mp3->getTags());
// echo $mp3->getDuration()."\n";
class Mp3 {
protected $tags = [];
protected $versions = [
0x0 => '2.5',
0x1 => 'x',
0x2 => '2',
0x3 => '1',
];
protected $layers = [
0x0 => 'x',
0x1 => '3',
0x2 => '2',
0x3 => '1',
];
protected $bitrates = [
'V1L1' => [0,32,64,96,128,160,192,224,256,288,320,352,384,416,448],
'V1L2' => [0,32,48,56, 64, 80, 96,112,128,160,192,224,256,320,384],
'V1L3' => [0,32,40,48, 56, 64, 80, 96,112,128,160,192,224,256,320],
'V2L1' => [0,32,48,56, 64, 80, 96,112,128,144,160,176,192,224,256],
'V2L2' => [0, 8,16,24, 32, 40, 48, 56, 64, 80, 96,112,128,144,160],
'V2L3' => [0, 8,16,24, 32, 40, 48, 56, 64, 80, 96,112,128,144,160],
];
protected $samplerates = [
'1' => [44100, 48000, 32000],
'2' => [22050, 24000, 16000],
'2.5' => [11025, 12000, 8000],
];
protected $samples = [
1 => [1 => 384, 2 => 1152, 3 => 1152,],
2 => [1 => 384, 2 => 1152, 3 => 576,],
];
protected $factor = 10;
protected $filename;
protected $data = [];
protected $duration = 0;
public function __construct($filename) {
$this->filename = $filename;
}
public function readAudioData() {
// Open the file.
$fileHandle = fopen($this->filename, "rb");
// Skip header.
$offset = $this->headerOffset($fileHandle);
fseek($fileHandle, $offset, SEEK_SET);
while (!feof($fileHandle)) {
// We nibble away at the file, 10 bytes at a time.
$block = fread($fileHandle, 8);
if (strlen($block) < 8) {
break;
}
//looking for 1111 1111 111 (frame synchronization bits)
else if ($block[0] == "\xff" && (ord($block[1]) & 0xe0)) {
$fourbytes = substr($block, 0, 4);
// The first block of bytes will always be 0xff in the framesync
// so we ignore $fourbytes[0] but need to process $fourbytes[1] for
// the version information.
$b1 = ord($fourbytes[1]);
$b2 = ord($fourbytes[2]);
$b3 = ord($fourbytes[3]);
// Extract the version and create a simple version for lookup.
$version = $this->versions[($b1 & 0x18) >> 3];
$simpleVersion = ($version == '2.5' ? 2 : $version);
// Extract layer.
$layer = $this->layers[($b1 & 0x06) >> 1];
// Extract protection bit.
$protectionBit = ($b1 & 0x01);
// Extract bitrate.
$bitrateKey = sprintf('V%dL%d', $simpleVersion, $layer);
$bitrateId = ($b2 & 0xf0) >> 4;
$bitrate = isset($this->bitrates[$bitrateKey][$bitrateId]) ? $this->bitrates[$bitrateKey][$bitrateId] : 0;
// Extract the sample rate.
$sampleRateId = ($b2 & 0x0c) >> 2;
$sampleRate = isset($this->samplerates[$version][$sampleRateId]) ? $this->samplerates[$version][$sampleRateId] : 0;
// Extract padding bit.
$paddingBit = ($b2 & 0x02) >> 1;
// Extract framesize.
if ($layer == 1) {
$framesize = intval(((12 * $bitrate * 1000 / $sampleRate) + $paddingBit) * 4);
}
else {
// Later 2 and 3.
$framesize = intval(((144 * $bitrate * 1000) / $sampleRate) + $paddingBit);
}
// Extract samples.
$frameSamples = $this->samples[$simpleVersion][$layer];
// Extract other bits.
$channelModeBits = ($b3 & 0xc0) >> 6;
$modeExtensionBits = ($b3 & 0x30) >> 4;
$copyrightBit = ($b3 & 0x08) >> 3;
$originalBit = ($b3 & 0x04) >> 2;
$emphasis = ($b3 & 0x03);
// Calculate the duration and add this to the running total.
$this->duration += ($frameSamples / $sampleRate);
// Read the frame data into memory.
$frameData = fread($fileHandle, $framesize - 6);
//
// $average = 0;
// $sampleBytes = 8;
// for ($i = 0; $i <= $sampleBytes; $i++) {
// $average += ord($frameData[$i]);
// }
// $this->data[0][$this->duration * $this->factor] = $average / $sampleBytes;
$this->data[0][$this->duration * $this->factor] = ord($frameData[0]);
$this->data[1][$this->duration * $this->factor] = ord($frameData[2]);
$this->data[2][$this->duration * $this->factor] = ord($frameData[9]);
$this->data[3][$this->duration * $this->factor] = ord($frameData[16]);
$this->data[4][$this->duration * $this->factor] = ord($frameData[23]);
}
else if (substr($block, 0, 3) == 'TAG') {
// If this is a tag then jump over it.
fseek($fileHandle, 128 - 10, SEEK_CUR);
}
else {
fseek($fileHandle, -9, SEEK_CUR);
}
}
}
/**
*
*/
public function headerOffset($fileHandle) {
// Extract the first 10 bytes of the file and set the handle back to 0.
fseek($fileHandle, 0);
$block = fread($fileHandle, 10);
fseek($fileHandle, 0);
$offset = 0;
if (substr($block, 0, 3) == "ID3") {
// We can ignore bytes 3 and 4 so they aren't extracted here.
// Extract ID3 flags.
$id3v2Flags = ord($block[5]);
$flagUnsynchronisation = $id3v2Flags & 0x80 ? 1 : 0;
$flagExtendedHeader = $id3v2Flags & 0x40 ? 1 : 0;
$flagExperimental = $id3v2Flags & 0x20 ? 1 : 0;
$flagFooterPresent = $id3v2Flags & 0x10 ? 1 : 0;
// Extract the length bytes.
$length0 = ord($block[6]);
$length1 = ord($block[7]);
$length2 = ord($block[8]);
$length3 = ord($block[9]);
// Check to make sure this is a safesynch integer by looking at the starting bit.
if ((($length0 & 0x80) == 0) && (($length1 & 0x80) == 0) && (($length2 & 0x80) == 0) && (($length3 & 0x80) == 0)) {
// Extract the tag size.
$tagSize = $length0 << 21 | $length1 << 14 | $length2 << 7 | $length3;
// Find out the length of other elements based on header size and footer flag.
$headerSize = 10;
$footerSize = $flagFooterPresent ? 10 : 0;
// Add this all together.
$offset = $headerSize + $tagSize + $footerSize;
}
}
return $offset;
}
public function readTags() {
$fileHandle = fopen($this->filename, 'rb');
$headerOffset = $this->headerOffset($fileHandle);
$binary = fread($fileHandle, $headerOffset);
if (substr($binary, 0, 3) == "ID3") {
// ID3 tags detected.
$this->tags['FileName'] = $this->filename;
$this->tags['TAG'] = substr($binary, 0, 3);
$this->tags['Version'] = hexdec(bin2hex(substr($binary, 3, 1))) . "." . hexdec(bin2hex(substr($binary, 4, 1)));
}
else {
$this->tags['FileName'] = $this->filename;
return;
}
if ($this->tags['Version'] == "2.0") {
$id3v22 = ["TT2", "TAL", "TP1", "TRK", "TYE", "TLEN", "ULT"];
for ($i = 0; $i < count($id3v22); $i++) {
// Look for each tag within the data of the file.
if (strpos($binary, $id3v22[$i] . chr(0)) != FALSE) {
// Extract the tag position and length of data.
$pos = strpos($binary, $id3v22[$i] . chr(0));
$len = hexdec(bin2hex(substr($binary, ($pos + 3), 3)));
$data = substr($binary, ($pos + 6), $len);
$tag = substr($binary, $pos, 3);
// Extract data.
$tagData = '';
for ($a = 0; $a <= strlen($data); $a++) {
$char = substr($data, $a, 1);
if (ord($char) != 0 && ord($char) != 3 && ord($char) != 225 && ctype_print($char)) {
$tagData .= $char;
}
elseif (ord($char) == 225 || ord($char) == 13) {
$tagData .= "\n";
}
}
if ($tag == "TT2") {
$encoding = $this->detect_bom_encoding($tagData);
if($encoding != 'UTF-8') {
// Remove 'ÿþ' from beginning of string due to UTF8 encoding
$this->tags['Title'] = str_replace('ÿþ', '', utf8_encode($tagData));
} else {
$this->tags['Title'] = $tagData;
}
}
if ($tag == "TAL") {
$this->tags['Album'] = $tagData;
}
if ($tag == "TP1") {
$this->tags['Author'] = $tagData;
}
if ($tag == "TRK") {
$this->tags['Track'] = $tagData;
}
if ($tag == "TYE") {
$this->tags['Year'] = $tagData;
}
if ($tag == "TLEN") {
$this->tags['Length'] = $tagData;
}
if ($tag == "ULT") {
$this->tags['Lyric'] = $tagData;
}
}
}
}
if ($this->tags['Version'] == "4.0" || $this->tags['Version'] == "3.0") {
$id3v23 = ["TIT2", "TALB", "TPE1", "TRCK", "TYER", "TLEN", "USLT"];
// Look for each tag within the data of the file.
for ($i = 0; $i < count($id3v23); $i++) {
if (strpos($binary, $id3v23[$i] . chr(0)) != FALSE) {
// Extract the tag position and length of data.
$pos = strpos($binary, $id3v23[$i] . chr(0));
$len = hexdec(bin2hex(substr($binary, ($pos + 5), 3)));
$data = substr($binary, ($pos + 10), $len);
$tag = substr($binary, $pos, 4);
// Extract tag and data.
$tagData = '';
for ($a = 0; $a <= strlen($data); $a++) {
$char = substr($data, $a, 1);
if (ord($char) != 0 && ord($char) != 3 && ord($char) != 225 && ctype_print($char)) {
$tagData .= $char;
}
elseif (ord($char) == 225 || ord($char) == 13) {
$tagData .= "\n";
}
}
if ($tag == "TIT2") {
$this->tags['Title'] = $tagData;
}
if ($tag == "TALB") {
$this->tags['Album'] = $tagData;
}
if ($tag == "TPE1") {
$this->tags['Author'] = $tagData;
}
if ($tag == "TRCK") {
$this->tags['Track'] = $tagData;
}
if ($tag == "TYER") {
$this->tags['Year'] = $tagData;
}
if ($tag == "TLEN") {
$this->tags['Length'] = $tagData;
}
if ($tag == "USLT") {
$this->tags['Lyric'] = $tagData;
}
}
}
}
}
protected function detect_bom_encoding($str) {
if ($str[0] == chr(0xEF) && $str[1] == chr(0xBB) && $str[2] == chr(0xBF)) {
return 'UTF-8';
}
else if ($str[0] == chr(0x00) && $str[1] == chr(0x00) && $str[2] == chr(0xFE) && $str[3] == chr(0xFF)) {
return 'UTF-32BE';
}
else if ($str[0] == chr(0xFF) && $str[1] == chr(0xFE)) {
if ($str[2] == chr(0x00) && $str[3] == chr(0x00)) {
return 'UTF-32LE';
}
return 'UTF-16LE';
}
else if ($str[0] == chr(0xFE) && $str[1] == chr(0xFF)) {
return 'UTF-16BE';
}
else {
return "can't detect";
}
}
public function formatTime($duration)
{
$hours = floor($duration / 3600);
$minutes = floor( ($duration - ($hours * 3600)) / 60);
$seconds = $duration - ($hours * 3600) - ($minutes * 60);
if($hours > 0) {
return sprintf("%02d:%02d:%02d", $hours, $minutes, $seconds);
}
return sprintf("%02d:%02d", $minutes, $seconds);
}
public function getTags() {
$this->readTags();
return $this->tags;
}
public function getDuration() {
$this->readAudioData();
return $this->formatTime($this->duration);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment