Skip to content

Instantly share code, notes, and snippets.

@dMaggot
Created January 4, 2010 02:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dMaggot/268238 to your computer and use it in GitHub Desktop.
Save dMaggot/268238 to your computer and use it in GitHub Desktop.
#/usr/bin/perl -w
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
use strict;
use MP3::Tag;
use String::Approx;
use Text::LevenshteinXS;
use List::Util;
MP3::Tag->config( write_v24 => 1 );
sub fixNames(\%$)
{
my %nameClusters = %{ $_[0] };
my $debug = $_[1];
my %namesMapping;
my @sortedArtistKeys = sort( keys %nameClusters );
for ( my $i = 0 ; $i < scalar(@sortedArtistKeys) ; $i++ )
{
for ( my $j = $i + 1 ; $j < scalar(@sortedArtistKeys) ; $j++ )
{
my $distance = Text::LevenshteinXS::distance( $sortedArtistKeys[$i], $sortedArtistKeys[$j] );
if ( $distance < 0.3 * List::Util::min( ( length( $sortedArtistKeys[$i] ), length( $sortedArtistKeys[$j] ) ) ) )
{
push( @{ $nameClusters{ $sortedArtistKeys[$i] } }, @{ $nameClusters{ $sortedArtistKeys[$j] } } );
if ($debug)
{
print $sortedArtistKeys[$j] . " matches " . $sortedArtistKeys[$i] . "\n";
}
delete $nameClusters{ $sortedArtistKeys[$j] };
delete $sortedArtistKeys[$j];
$j--;
}
}
}
foreach my $clusterkey ( keys %nameClusters )
{
my @names = @{ $nameClusters{$clusterkey} };
my %namesCounter;
foreach my $name (@names)
{
if ( exists( $namesCounter{$name} ) )
{
$namesCounter{$name}++;
}
else
{
$namesCounter{$name} = 1;
}
}
my $max = List::Util::max( values %namesCounter );
my @nMaxes = ();
foreach my $artist ( keys %namesCounter )
{
if ( $namesCounter{$artist} == $max )
{
push( @nMaxes, $artist );
}
}
my $replacement;
if ( scalar(@nMaxes) > 1 )
{
print "Choose the right name among\n";
for ( my $i = 0 ; $i < scalar(@nMaxes) ; $i++ )
{
print "[" . ( $i + 1 ) . "] " . $nMaxes[$i] . "\n";
}
my $choose;
do
{
print ": ";
$choose = <STDIN>;
$choose += 0;
} while ( $choose == 0 || $choose > scalar(@nMaxes) );
$replacement = $nMaxes[ $choose - 1 ];
}
else
{
$replacement = $nMaxes[0];
}
if ($debug)
{
print "{" . join( ",", keys %namesCounter ) . "} -> " . $replacement . "\n";
}
foreach my $name ( keys %namesCounter )
{
$namesMapping{$name} = $replacement;
}
}
return %namesMapping;
}
my %artistClusters;
my %artistAlbumClusters;
my %artistsMapping;
my %artistsAlbumsMapping;
my @mp3files = <*.mp3>;
my @mp3tags;
foreach my $mp3file (@mp3files)
{
my $thisTag = MP3::Tag->new($mp3file);
my $artist = $thisTag->artist();
$artist = uc $artist;
if ( $artist =~ "^THE " )
{
$artist = substr( $artist, 4 );
}
if ( length($artist) > 0 )
{
push( @{ $artistClusters{$artist} }, $thisTag->artist() );
}
}
%artistsMapping = fixNames( %artistClusters, 0 );
foreach my $mp3file (@mp3files)
{
my $thisTag = MP3::Tag->new($mp3file);
my $album = $thisTag->album();
my $fixedArtist = $artistsMapping{ $thisTag->artist() };
$album = uc $album;
if ( $album =~ "^THE " )
{
$album = substr( $album, 4 );
}
if ( length($album) > 0 )
{
push( @{ ${ artistAlbumClusters { $fixedArtist } }{$album} }, $thisTag->album() );
}
}
foreach my $artist ( keys %artistAlbumClusters )
{
my %albumClusters = %{ $artistAlbumClusters{$artist} };
%{ $artistsAlbumsMapping{$artist} } = fixNames( %albumClusters, 0 );
}
foreach my $mp3file (@mp3files)
{
my $thisTag = MP3::Tag->new($mp3file);
if ( length( $thisTag->artist() ) > 0 )
{
my $fixedArtist = $artistsMapping{ $thisTag->artist() };
if ( $fixedArtist ne $thisTag->artist() )
{
$thisTag->artist_set($fixedArtist);
}
if ( length( $thisTag->album() ) > 0 )
{
my $fixedAlbum = ${ %{ $artistsAlbumsMapping{$fixedArtist} } }{ $thisTag->album() };
if ( $fixedAlbum ne $thisTag->album() )
{
$thisTag->album_set($fixedAlbum);
}
}
}
$thisTag->update_tags( {} );
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment