Skip to content

Instantly share code, notes, and snippets.

@skaji
Created August 21, 2012 17:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save skaji/3417549 to your computer and use it in GitHub Desktop.
Save skaji/3417549 to your computer and use it in GitHub Desktop.
Image Download
#!/usr/bin/env perl
use strict;
use warnings;
use Encode qw{encode_utf8 decode_utf8};
use utf8;
use LWP::UserAgent;
use HTTP::Request;
use Digest::MD5 qw/md5_hex/;
use Path::Class qw/dir file/;
use URI;
use JSON qw{decode_json};
use AnyEvent;
use AnyEvent::HTTP;
use Term::ANSIColor;
my $query = @ARGV ? $ARGV[0] : 'AKB48';
my $account_key = 'ACCOUNT KEY';
my $url = URI->new('https://api.datamarket.azure.com/Bing/Search/Image');
my $ua = LWP::UserAgent->new;
my $page_count = 0;
my $download_count = 0;
my $dir = dir("./data");
my $interval = 50;
$dir->mkpath if ! -d $dir;
STDOUT->autoflush(1);
while (1) {
my $skip = $page_count * $interval;
$url->query_form(
'Query' => qq{'$query'},
'$top' => $interval,
'$skip' => $skip,
'$format' => 'json',
);
my $req = HTTP::Request->new(GET => $url);
$req->authorization_basic('', $account_key);
my $res = $ua->request($req);
die $res->status_line if !$res->is_success;
my $json = decode_json $res->content;
last if !defined $json->{d}{results};
my $cv = AnyEvent->condvar;
my $stopper = AnyEvent->timer(
after => 60,
cb => sub { $cv->send }
);
my $begin_download_count = $download_count;
for my $entry (@{ $json->{d}{results} }) {
my $media_url = $entry->{MediaUrl};
next unless $media_url =~ /\.jpg$/;
my $filename = md5_hex(encode_utf8($media_url)) . '.jpg';
my $filepath = $dir->file($filename);
next if -f $filepath;
$download_count++;
http_get_event($cv, $media_url, $filepath, $download_count);
}
$cv->recv if $download_count != $begin_download_count;
print colored("[$download_count done]\n", 'green');
$page_count++;
}
sub http_get_event {
my ($cv, $media_url, $filepath, $download_count) = @_;
$cv->begin;
http_get($media_url, sub {
my ($body, $header) = @_;
if ($header->{Status} =~ /\A2/
&& $header->{'content-type'} =~ m{\Aimage}) {
open my $fh, '>:raw', $filepath or die $!;
print {$fh} $body;
close $fh;
print "$download_count:finished $media_url\n";
}
$cv->end;
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment