Skip to content

Instantly share code, notes, and snippets.

@ishiduca
Created September 24, 2011 07:27
Show Gist options
  • Save ishiduca/1239076 to your computer and use it in GitHub Desktop.
Save ishiduca/1239076 to your computer and use it in GitHub Desktop.
COMIC ZIN検索の結果を ハッシュリファレンスで返す based AnyEvent
package AnyEvent::Search::Scrape::Zin;
use strict;
use utf8;
use Carp;
use Encode;
use AnyEvent;
use AnyEvent::HTTP;
use URI::Escape;
use Web::Scraper;
our $VERSION = '0.01';
my $home = 'http://shop.comiczin.jp';
my $search = "${home}/products/list.php";
sub new {
my $class = shift;
my %args = @_;
$args{keyword} || Carp::croak qq(! failed: "keyword" parameter not found.);
$args{callback} || Carp::croak qq(! failed: "callback" parameter not found.);
my $uri = join '', $search, '?mode=search&name=', uri_escape_utf8($args{keyword});
my $self = bless {}, $class;
my $guard; $guard = sub {
http_get $uri, headers => $args{headers}, on_header => sub {
my $hdrs = shift;
if ($hdrs->{Status} ne '200') {
($args{on_error} || sub { die @_; })->("$uri: $hdrs->{Status} $hdrs->{Reason}");
return;
}
return 1;
}, sub {
my($body, $headers) = @_;
return $guard->() unless $body;
my $res = _get_list(decode_utf8 $body);
($args{callback})->($res);
};
};
$self->{guard} = AnyEvent::Util::guard { undef $guard; };
$guard->();
return $self;
}
sub _get_list {
my $html = shift;
my $scraper = scraper {
process '//form[@id="form1"]/ul/li/div/div[@class="data_area"]', 'list[]' => scraper {
process '//a', 'urlOfTitle' => [ '@href', sub { return $_->as_string; } ];
process '//a/img', 'urlOfThumbnail' => [ '@src', sub { return $_->as_string; } ];
process '//a/img', 'title' => '@alt';
process '//p', 'circle' => [ 'TEXT', sub {
my @details = split /\s/, $_;
pop @details; # 版を削除
pop @details; # 金額を削除
pop @details; # 作者を削除
return join(' ', @details);
} ];
};
};
my $res;
eval { $res = $scraper->scrape($html, $home); };
if ($@) {
Carp::carp qq(! failed: $@);
return undef;
}
$res->{list};
}
1;
__END__
=head1 NAME
AnyEvent::Search::Scrape::Zin - Comic ZIN Search interface for AnyEvent-based programs
=head1 SYNOPSIS
use utf8;
use AnyEvent::Search::Scrape::Zin;
use JSON;
my $cv = AnyEvent->condvar;
my $client = AnyEvent::Search::Scrape::Zin->new(
keyword => 'Ash wing',
callback => sub {
my $response = shift;
print encode_json $response;
$cv->send();
},
);
$cv->recv();
=head1 DESCTIPTION
AnyEvent::Search::Scrape::Zin provide a simple interface for AnyEvent-based programs.
it get top search results from shop.comiczin.jp and return a list of search results by hash reference.
=cut
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment