Skip to content

Instantly share code, notes, and snippets.

@ainvyu
Created September 29, 2013 18:25
Show Gist options
  • Save ainvyu/6755147 to your computer and use it in GitHub Desktop.
Save ainvyu/6755147 to your computer and use it in GitHub Desktop.
Game Developer Magazine Digital Edition Archives downloader
#!/usr/bin/env perl
use common::sense;
use Web::Query;
use Readonly;
use Coro;
use Coro::LWP;
use Coro::Semaphore;
use WWW::Mechanize;
Readonly my $target_url => 'http://www.gdcvault.com/gdmag';
my $cur_year;
my @url_info;
my $q = Web::Query->new_from_url($target_url)
->find("div.gnrpage tr")
->each( sub {
my ($i, $elem) = @_;
if ($elem->attr("bgcolor") eq '') {
$cur_year = $elem->find("h2")->text;
chomp $cur_year;
}
else {
# Remove space from begin & end.
my ($text) = $elem->find("td")->text =~ m{[^\s].*[^\s]}g;
chomp $text;
my $url_info = {
year => $cur_year,
text => $text,
url => $elem->find("a")->attr("href"),
};
push @url_info, $url_info;
}
}
);
print scalar(@url_info)."\n";
my $sem = Coro::Semaphore->new(16);
my @coros;
for my $dl (@url_info) {
push @coros, async {
my $guard = $sem->guard; # lock
my $mech = WWW::Mechanize->new();
$dl->{text} =~ s{\/}{-}g;
my ($file_ext) = $dl->{url} =~ m/.*\.(.*)/g;
my $save_path = $dl->{year}.'/'.$dl->{text}.'.'.$file_ext;
if (-e $save_path) {
return;
}
print "get url: ".$dl->{url}."\n";
print "save path: $save_path"."\n";
while (1) {
my $res = $mech->get($dl->{url});
if (!$res->is_success) {
print "fail! retry."."\n";
next;
}
print "success! try save file: $save_path"."\n";
mkdir $dl->{year} if (!-e $dl->{year});
open my $save_file, '>', $save_path;
binmode $save_file;
print {$save_file} $res->content;
close $save_file;
last;
}
};
};
$_->join for @coros;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment