Skip to content

Instantly share code, notes, and snippets.

@ishiduca
Created September 20, 2011 15:45
Show Gist options
  • Save ishiduca/1229457 to your computer and use it in GitHub Desktop.
Save ishiduca/1229457 to your computer and use it in GitHub Desktop.
COMIC ZIN検索の結果を ハッシュリファレンスで返す
package WWW::Search::Scrape::Zin;
use strict;
use utf8;
#use Encode;
use Carp;
use URI;
use URI::Escape;
use Web::Scraper;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
require Exporter;
our $VERSION = '0.01';
our @ISA = qw(Exporter);
our @EXPORT = qw(search);
our @EXPORT_OK = qw();
my $home = 'http://shop.comiczin.jp';
my $search = "${home}/products/list.php";
sub search {
my %params = @_;
unless ($params{'keyword'}) {
Carp::carp qq(paramater "keyword" not found');
return undef;
}
my $query = 'mode=search&name=' . uri_escape_utf8($params{'keyword'});
my $uri = "${search}?${query}";
my $scraper = scraper {
process '//form[@id="form1"]/ul/li/div/div[@class="data_area"]', 'list[]' => scraper {
process '//a', 'urlOfTitle' => [ '@href', sub { return $_->as_string; } ];
process '//a/img', 'urlOfThumbnail' => [ '@src', sub { return $_->as_string; } ];
process '//a/img', 'title' => '@alt';
process '//p', 'circle' => [ 'TEXT', sub {
my @details = split /\s/, $_;
pop @details; # 版を削除
pop @details; # 金額を削除
pop @details; # 作者を削除
return join(' ', @details);
} ];
};
};
my $res;
eval{$res = $scraper->scrape( URI->new($uri) );};
if ($@) {
Carp::carp "! failed: $@";
return undef;
}
$res->{list};
}
1;
__END__
=head1 NAME
WWW::Search::Scrape::Zin
=head1 SYNOPSIS
use WWW::Search::Scrape::Zin;
use utf8;
use JSON;
my $result = WWW::Search::Scrape::Zin::search(
keyword => '放課後プレイ'
);
die qq(Dawn...) unless $result;
print encode_json $result;
=head1 DESCRIPTION
WWW::Search::Scrape::Zin provide a simple interface to get top search results from comiczin.jp and return a list of search results by hash reference.
=cut
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment