Skip to content

Instantly share code, notes, and snippets.

@ishiduca
Created September 6, 2011 13:14
Show Gist options
  • Save ishiduca/1197499 to your computer and use it in GitHub Desktop.
Save ishiduca/1197499 to your computer and use it in GitHub Desktop.
とらのあな検索の結果を ハッシュリファレンスで返す
package WWW::Search::Scrape::Toranoana;
use strict;
use utf8;
use Carp;
use Encode;
use LWP::UserAgent;
use URI;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
require Exporter;
our $VERSION = '0.01';
our @ISA = qw(Exporter);
our @EXPORT = qw(search);
our @EXPORT_OK = qw();
my $enc_sjis = find_encoding('Shift_JIS');
my %default_params = (
item_kind => '0401',
bl_flg => '0',
adl => '0',
obj => '0',
stk => '1',
img => '1',
ps => '1', # next page '31'
);
my $home = 'http://www.toranoana.jp';
my $search_cgi = "${home}/cgi-bin/R2/d_search.cgi";
sub search {
my %params = @_;
unless ($params{'mode'}) {
Carp::carp qq(paramater "mode" not found');
return undef;
}
unless ($params{'q'}) {
Carp::carp qq(paramater "q" not found');
return undef;
}
$params{ $params{'mode'} } = $enc_sjis->encode($params{'q'});
delete $params{'mode'};
delete $params{'q'};
foreach my $key (keys %default_params) {
$params{$key} = $params{$key} || $default_params{$key};
}
my $uri = URI->new( $search_cgi );
$uri->query_form( %params );
my $client = LWP::UserAgent->new;
my $res = $client->get($uri);
unless ($res->is_success) {
Carp::carp $res->status_line;
return undef;
}
get_list($res->decoded_content);
}
sub get_list {
my $html = shift;
(my $reg=<<'REG') =~ tr/\n//d;
(/mailorder/article/[^"]+)"><img src="([^"]+)".*?
title="([^"]+)".*?
<span class="txt-9pt">.+?<a href="\s([^"]+)">(.*?)</a>
REG
;
my $list = [];
while ($html =~ m:$reg:gs) {
my($url_title, $url_thumbnail, $title, $url_circle, $circle)
= ($1, $2, $3, $4, $5);
push @{$list}, {
circle => $circle,
title => $title,
urlOfTitle => "${home}${url_title}",
urlOfCircle => "${home}${url_circle}",
urlOfThumbnail => $url_thumbnail,
};
}
$list;
}
1;
__END__
=head1 NAME
WWW::Search::Scrape::Toranoana
=head1 SYNOPSIS
use WWW::Search::Scrape::Toranoana;
use utf8;
use Encode qw(encode_utf8);
my $result = WWW::Search::Scrape::Toranoana::search(
mode => 'mak',
q => '絶対少女'
);
die qq(Dawn...) unless $result;
for my $item (@{$result}) {
print encode_utf8 $item->{title}, "\n";
}
=head1 DESCRIPTION
WWW::Search::Scrape::Toranoana provide a simple interface to get top search results from www.toranoana.jp and return a list of search results by hash reference.
=cut
@ishiduca
Copy link
Author

ishiduca commented Sep 6, 2011

  • AnyEvent::HTTP 版に直す
  • 正規表現の代わりに HTML::TreeBuilder::XPath を適用させる
  • クラスベースのほうがいいのかしらん?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment