Skip to content

Instantly share code, notes, and snippets.

@ishiduca
Created September 22, 2011 12:16
Show Gist options
  • Save ishiduca/1234639 to your computer and use it in GitHub Desktop.
Save ishiduca/1234639 to your computer and use it in GitHub Desktop.
とらのあな検索の結果を ハッシュリファレンスで返す based AnyEvent
package AnyEvent::Search::Scrape::Toranoana;
use strict;
use Carp;
use utf8;
use Encode;
use AnyEvent;
use AnyEvent::HTTP;
use URI::Escape;
our $VERSION = '0.01';
my $enc_sjis = find_encoding('Shift_JIS');
my $home = 'http://www.toranoana.jp';
my $search_uri = "${home}/cgi-bin/R2/d_search.cgi";
my %params = (
item_kind => '0401',
bl_flg => '0',
adl => '0',
obj => '0',
stk => '1',
img => '1',
ps => '1', # next => 31
);
sub new {
my($class, %args) = @_;
$args{mode} || Carp::croak qq(! failed "mode" parameter not found.);
$args{q} || Carp::croak qq(! failed "q" parameter not found.);
$args{callback} || Carp::croak qq(! failed "callback" parameter not found.);
Carp::croak qq(! faild "mode" is invalid value.) unless $args{mode} =~ m/(mak|nam|gnr|com|mch|act)/;
$params{$args{mode}} = $args{q};
my $query_string = '';
for my $key (keys %params) {
my $param = $args{$key} || $params{$key};
$query_string = join '&',
$query_string,
join('=', $key, uri_escape $enc_sjis->encode($param));
}
my $uri = "${search_uri}?${query_string}";
my $self = bless {}, $class;
my $guard; $guard = sub {
http_get $uri, headers => $args{headers}, on_header => sub {
my $hdrs = shift;
if ($hdrs->{Status} ne '200') {
($args{on_error} || sub { die @_ })->("$uri: $hdrs->{Status} $hdrs->{Reason}");
return;
}
return 1;
}, sub {
my($body, $headers) = @_;
return $guard->() unless $body;
my $res = _get_list($enc_sjis->decode($body));
($args{callback})->($res);
};
};
$self->{guard} = AnyEvent::Util::guard { undef $guard; };
$guard->();
return $self;
}
sub _get_list {
my $html = shift;
(my $reg=<<'REG') =~ tr/\n//d;
(/mailorder/article/[^"]+)"><img src="([^"]+)".*?
title="([^"]+)".*?
<span class="txt-9pt">.+?<a href="\s([^"]+)">(.*?)</a>
REG
;
my $list = [];
while ($html =~ m:$reg:gs) {
my($url_title, $url_thumbnail, $title, $url_circle, $circle)
= ($1, $2, $3, $4, $5);
$circle =~ s/<img([^>]+?)>//;
push @{$list}, {
circle => $circle,
title => $title,
urlOfTitle => "${home}${url_title}",
urlOfCircle => "${home}${url_circle}",
urlOfThumbnail => $url_thumbnail,
};
}
$list;
}
1;
__END__
=head1 NAME
AnyEvent::Search::Scrape::Toranoana - Toranoana Search interface for AnyEvent-based programs
=head1 SYNOPSIS
use utf8;
use AnyEvent::Search::Scrape::Toranoana;
use JSON;
my $cv = AnyEvent->condvar;
my $client = AnyEvent::Search::Scrape::Toranoana->new(
mode => 'mak',
q => '絶対少女',
callback => sub {
my $response = shift;
print encode_json $response;
$cv->send();
},
);
$cv->recv();
=head1 DESCRIPTION
AnyEvent::Search::Scrape::Toranoana provide a simple interface for AnyEvent-based programs.
it get top search results from www.toranoana.jp and return a list of search results by hash reference.
=cut
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment