Skip to content

Instantly share code, notes, and snippets.

@detain
Last active February 29, 2020 23:37
Show Gist options
  • Save detain/df1cb0cccc55a01e77bf928db7394b0b to your computer and use it in GitHub Desktop.
Save detain/df1cb0cccc55a01e77bf928db7394b0b to your computer and use it in GitHub Desktop.
Recursivly Iterates a directory finding MP3s and then loading the ID3 info via 'mediainfo' for each. it then builds a list of genre popularity by artist (how many songs for each artist use which genre) working with genre beingset to multiple genres and split with either a semicolon (;) or foreslash (/)
<?php
/**
* Recursivly Iterates a directory finding MP3s and then loading the ID3 info via 'mediainfo' for each.
* it then builds a list of genre popularity by artist (how many songs for each artist use which genre)
* working with genre beingset to multiple genres and split with either a semicolon (;) or foreslash (/)
*
* - Automatic Backups every 120 seconds (configurable)
* @see $backupSeconds
* - Intelligent Reprocessing of MP3s only updating when the "Last Modified Time" is newer than what we stored in the db
* - Commands Optimization drastically cutting back on the number of times "mediainfo" is ran
* say for 2500 files to update updates (originally 2500 calls to update them) it grouped
* them into a single call per Genre so for me it went down from around 2500 to 80.
* - Automatic handling of removed files without having to specificly check for them.
*
* @author Joe Huss <detain@interserver.net>
* @copyright 2020
* *
*/
$mp3Base = __DIR__;
$found = 0;
$badGenres = ['Other', '!', 'Unknown', 'Miscellaneous'];
$toFix = [];
$artistCounts = [];
$backupSeconds = 120;
exec('which mediainfo', $out, $missingMediaInfo);
unset($out);
exec('which mp3info2', $out, $missingMp3Info2);
unset($out);
if ($missingMediaInfo + $missingMp3Info2 > 0) {
if ($missingMediaInfo == 1) {
echo '[ERROR] Missing "mediainfo" ! Get it from https://mediaarea.net/en/MediaInfo or "apt install mediainfo"'.PHP_EOL;
}
if ($missingMp3Info2 == 1) {
echo '[ERROR] Missing "mp3info2" ! Get it from perl MP3::Tag module at https://metacpan.org/release/MP3-Tag or "apt install libmp3-tag-perl"'.PHP_EOL;
}
exit;
}
if (file_exists('mp3s.json')) {
$mp3s = json_decode(file_get_contents('mp3s.json'), true);
echo "Loaded Stored Data for ".count($mp3s)." MP3s\n";
} else {
$mp3s = [];
}
echo "Scanning Path '{$mp3Base}'...";
$cmd = 'find '.escapeshellarg($mp3Base).' -type f -name "*.mp3"';
$scanTime = time();
$lastBackup = $scanTime;
$files = explode("\n", trim(`$cmd`));
$total = count($files);
echo $total.' files'.PHP_EOL;
foreach ($files as $file) {
$modified = filemtime($file);
$needsUpdate = true;
if (array_key_exists($file, $mp3s)) {
if ($modified <= $mp3s[$file]['modified']) {
$needsUpdate = false;
}
}
if ($needsUpdate === true) {
$cmd = 'mediainfo --Output=JSON '.escapeshellarg($file);
$data = json_decode(trim(`$cmd`), true);
//echo "[$found/$total] Loading Media Info for {$file}\n";
}
$data['modified'] = $modified;
$data['seen'] = $scanTime;
$mp3s[$file] = $data;
$found++;
if (time() - $lastBackup > $backupSeconds) {
echo '['.$found.'/'.$total.'] Backup Interval triggered saving the current data...';
$bytes = file_put_contents('mp3s.json', json_encode($mp3s, JSON_PRETTY_PRINT));
echo $bytes.' bytes written to "mp3s.json"'.PHP_EOL;
$lastBackup = time();
}
}
echo "File Info Loaded from ".count($mp3s)." MP3s\n";
$bytes = file_put_contents('mp3s.json', json_encode($mp3s, JSON_PRETTY_PRINT));
echo $bytes.' bytes written to "mp3s.json"'.PHP_EOL;
echo "Examining the MP3s for Genres and generating some mapping info".PHP_EOL;
foreach ($mp3s as $mp3 => $data) {
$types = [];
foreach ($data['media']['track'] as $idx => $track) {
$types[$track['@type']] = $idx;
}
if (!isset($data['media']['track'][$types['General']]['Performer'])) {
continue;
}
if (!isset($data['media']['track'][$types['General']]['Genre'])) {
$toFix[] = $mp3;
continue;
}
//echo "MP3: $mp3\n";
//print_r($data['media']['track'][$types['General']]);
$artist = $data['media']['track'][$types['General']]['Performer'];
$genres = $data['media']['track'][$types['General']]['Genre'];
if (!array_key_exists($artist, $artistCounts)) {
$artistCounts[$artist] = [];
}
if (in_array($genres, $badGenres)) {
$toFix[] = $mp3;
} elseif (strpos($genres, ';') !== false) {
$genres = explode(';', $genres);
$toFix[] = $mp3;
} elseif (strpos($genres, '/') !== false) {
$genres = explode('/', $genres);
$toFix[] = $mp3;
} else {
$genres = [$genres];
}
foreach ($genres as $genre) {
$genre = trim($genre);
if (!array_key_exists($genre, $artistCounts[$artist])) {
$artistCounts[$artist][$genre] = 0;
}
$artistCounts[$artist][$genre]++;
}
}
echo count($toFix)." mp3 files found with multiple genres to fix\n";
foreach ($artistCounts as $artist => $genres) {
$highestCount = 0;
$highestGenre = false;
foreach ($genres as $genre => $count) {
if ($count > $highestCount) {
$highestGenre = $genre;
$highestCount = $count;
}
}
echo 'Found artist '.$artist.' to most likely be "'.$highestGenre.'" with a frequency of '.$highestCount.' among '.count($genres).' detected genres'.PHP_EOL;
$artistCounts[$artist] = $highestGenre;
}
$updates = [];
foreach ($toFix as $mp3) {
$data = $mp3s[$mp3];
$types = [];
foreach ($data['media']['track'] as $idx => $track) {
$types[$track['@type']] = $idx;
}
$artist = $data['media']['track'][$types['General']]['Performer'];
if (!array_key_exists($artist, $artistCounts)) {
//echo "Artist $artist had no genres for MP3 $mp3\n";
continue;
}
$updates[$mp3] = $artistCounts[$artist];
}
//file_put_contents('updates.json', json_encode($updates, JSON_PRETTY_PRINT));
//echo "wrote updates.json with ".count($updates)." fixs\n";
$mp3s = $updates;
echo "Grouping ".count($mp3s)." MP3s to update by Genres\n";
$genres = [];
foreach ($mp3s as $mp3 => $genre) {
if (!isset($genres[$genre])) {
$genres[$genre] = [];
}
$genres[$genre][] = $mp3;
}
echo "Sorted them down to ".count($genres)." genres to update\n";
$cmds = [];
foreach ($genres as $genre => $files) {
$cmd = 'mp3info2 -g '.escapeshellarg($genre);
foreach ($files as $file) {
$cmd .= ' '.escapeshellarg($file);
}
$cmds[] = $cmd;
}
file_put_contents('update_mp3s.sh', implode("\n", $cmds));
echo "Done Updating ".count($updates)." MP3s\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment