Skip to content

Instantly share code, notes, and snippets.

@williamjacksn
Created February 9, 2011 21:11
Show Gist options
  • Save williamjacksn/819290 to your computer and use it in GitHub Desktop.
Save williamjacksn/819290 to your computer and use it in GitHub Desktop.
Get album art from ocremix.org
#!/usr/bin/perl
# ocremix.org album art get!
# (c) 2011 William Jackson <william@subtlecoolness.com>
# Download album art for a particular remix into the current working directory.
# Rename the album art to "folder.(jpg|gif|png)".
use strict;
use warnings;
use Cwd;
use LWP::Simple;
use HTML::TreeBuilder;
my $url;
if (defined($ARGV[0]) && ($ARGV[0] =~ /^\d+$/)) {
$url = sprintf("http://ocremix.org/remix/OCR%05d/", $ARGV[0]);
} else {
print "Use `ocr_album_art.pl <ocr_id> [-f]` to download album art into " .
"this directory.\n";
print "To report bugs or contact the author, leave a comment at ".
"https://gist.github.com/819290\n";
exit(0);
}
# Use the command line argument `-f` or `--force` after <ocr_id> to delete
# existing folder.(jpg|gif|png) files in the current working directory.
my $force = 0;
if (defined($ARGV[1]) && (($ARGV[1] eq "-f") || ($ARGV[1] eq "--force"))) {
$force = 1;
}
# Check for the existence of folder.(jpg|gif|png) in
# the current working directory.
foreach my $ext ("jpg", "gif", "png") {
if (-e cwd() . "/folder." . $ext) {
if ($force) {
unlink(cwd() . "/folder." . $ext);
} else {
print STDERR "There is already a folder." . $ext .
" in this directory.\n";
print STDERR "Use `ocr_album_art.pl <ocr_id> -f` to overwrite " .
"the album art in this directory.\n";
exit(1);
}
}
}
# Check to make sure the requested ocr_id is valid.
my $html = get($url);
unless (defined($html)) {
print STDERR "There is no information at this url: $url\n";
print STDERR "Are you sure " . $ARGV[0] . " is a valid ocr_id?\n";
exit(1);
}
my $tree = HTML::TreeBuilder->new_from_content($html);
# The images I want are inside <div id="panel-main"> and always have
# the string "games" in the src attribute.
my $panel_main = $tree->look_down("id", "panel-main");
my $img = $panel_main->look_down("_tag", "img",
sub {index($_[0]->attr("src"), "games") > -1});
# The img src is usually something like:
# "/thumbs/180/files/images/games/sms/3/outrun-sms-title-1011.gif"
# I can safely remove everything before "files".
my $src = "http://ocremix.org/" .
substr($img->attr("src"), index($img->attr("src"), "files"));
print " Source: " . $src . "\n";
# I assume the file will have a single dot that separates the filename from
# the extension (after the . in "ocremix.org", of course).
my $ext = substr($src, index($src, ".", 15) + 1);
# Just in case I am downloading the art for "Knuckles' Chaotix" ...
# No, seriously, that is the only album that does not use .jpg, .gif, or .png.
if ($ext eq "jpeg") {
$ext = "jpg";
}
my $dest = cwd() . "/folder." . $ext;
print "Destination: " . $dest . "\n";
getstore($src, $dest);
$tree->delete();
exit(0);
#!/usr/bin/perl
# ocremix.org album art get (for Rainwave)!
# (c) 2011 William Jackson <william@subtlecoolness.com>
# Download album art for a bunch of games.
# Rename the album art to "folder.(jpg|gif|png)".
# With a directory structures such as "./Album1/Remix1.mp3", this should be run
# from ./ (the parent directory of each album directory).
use strict;
use warnings;
use Cwd;
use LWP::Simple;
use HTML::TreeBuilder;
# use MP3::Tag;
# Keep track of how this run went.
my $existing = 0;
my $fetch_error = 0;
my $success = 0;
# Get a list of directories.
my @albumdirs;
opendir(ROOT, cwd());
while (defined(my $file = readdir(ROOT))) {
next if $file =~ /^\.\.?$/;
if (-d $file) {
push(@albumdirs, $file);
}
}
closedir(ROOT);
ALBUMDIR: foreach my $albumdir (@albumdirs) {
# For each directory in the list, chdir into that directory.
chdir($albumdir);
print " Directory: " . cwd() . "\n";
# Check for the existence of "folder.(jpg|gif|png)" in
# the current directory.
foreach my $ext ("jpg", "gif", "png") {
if (-e cwd() . "/folder." . $ext) {
print STDERR " Existing: " . cwd() . "/folder." . $ext . "\n";
$existing ++;
next ALBUMDIR;
}
}
# Get the first .mp3 file in the directory.
my $mp3file;
opendir(ALBUMDIR, cwd());
while (defined(my $file = readdir(ALBUMDIR))) {
if (index($file, ".mp3") > -1) {
$mp3file = $file;
last;
}
}
closedir(ALBUMDIR);
# If there were no mp3 files in this directory,
# move on to the next directory.
unless (defined($mp3file)) {
next ALBUMDIR;
}
# Get the "WWW" id3v2 tag for that .mp3 file.
# The following (commented) code is for use with the MP3::Tag module.
# my $mp3tag = MP3::Tag->new($mp3file);
# $mp3tag->get_tags();
# my ($info, undef) = $mp3tag->{ID3v2}->get_frame("WXXX");
# my $url = $$info{"URL"} . "\n";
my $url = substr(`tagset $mp3file | grep WWW`, 6);
chomp($url);
my $html = get($url);
sleep(1);
unless (defined($html)) {
print STDERR "Fetch Error: $url\n";
$fetch_error ++;
next ALBUMDIR;
}
my $tree = HTML::TreeBuilder->new_from_content($html);
# The images I want are inside <div id="panel-main"> and always have
# the string "games" in the src attribute.
my $panel_main = $tree->look_down("id", "panel-main");
my $img = $panel_main->look_down("_tag", "img",
sub {index($_[0]->attr("src"), "games") > -1});
# The img src is usually something like:
# "/thumbs/180/files/images/games/sms/3/outrun-sms-title-1011.gif"
# I can safely remove everything before "files".
my $src = "http://ocremix.org/" .
substr($img->attr("src"), index($img->attr("src"), "files"));
print " Source: " . $src . "\n";
# I assume the file will have a single dot that separates the filename from
# the extension (after the . in "ocremix.org", of course).
my $ext = substr($src, index($src, ".", 15) + 1);
# Just in case I am downloading the art for "Knuckles' Chaotix" ...
# No, seriously, that is the only album that does not use
# .jpg, .gif, or .png.
if ($ext eq "jpeg") {
$ext = "jpg";
}
my $dest = cwd() . "/folder." . $ext;
print "Destination: " . $dest . "\n";
# Download the album art.
my $http_response = getstore($src, $dest);
sleep(1);
if (is_success($http_response)) {
$success ++;
} elsif (is_error($http_response)) {
$fetch_error ++;
}
# Recycle the parsed HTML tree.
$tree->delete();
} continue {
# Change back to the root directory.
print "------\n";
chdir("..");
}
# Print some stats about how this run went.
print " Success: $success\n";
print " Existing: $existing\n";
print "Fetch Error: $fetch_error\n";
exit(0);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment