Skip to content

Instantly share code, notes, and snippets.

@petersohn
Created February 23, 2014 08:51
Show Gist options
  • Save petersohn/9168879 to your computer and use it in GitHub Desktop.
Save petersohn/9168879 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
my $titlepage = `wget -O - -o download.log http://www.giantitp.com/comics/oots.html`;
my %titles;
while ($titlepage =~ m'<A href="/comics/oots(\d+)\.html">([^<]*)</A>'sg) {
print "$1 $2\n";
$titles{$1} = $2;
}
LOOP:foreach my $num (sort keys %titles) {
my @existingFile = glob "$num*";
if (@existingFile != 0) {
next LOOP;
}
my $title = $titles{$num};
my $pagename = "http://www.giantitp.com/comics/oots${num}.html";
my $page = `wget -O - -a download.log $pagename`;
if ($? != 0) {
print "Error retrieving page: $pagename\n";
last LOOP;
}
if ($page =~ m'<TD align="center"><IMG src="/comics/images/([^"]+)\.([^".]+)"></TD>'s) {
my $remotefn = "$1.$2";
my $ext = $2;
$fn = sprintf "%s - %s.$ext", $num, $title;
$fn =~ s/'/'\\''/g;
$fn =~ s!/!_!g;
print "$fn";
if (!-f $fn) {
print " (downloading)";
my $imagename = "http://www.giantitp.com/comics/images/$remotefn";
my $command = "wget -O '$fn' -a download.log $imagename";
my $ret = system($command);
if ($ret != 0) {
print "Error retrieving page: $imagename\n";
unlink $fn;
last LOOP;
}
}
print "\n";
} else {
print "Image not found.\n";
}
}
print "\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment