Skip to content

Instantly share code, notes, and snippets.

@ishiduca
Created September 24, 2011 16:01
Show Gist options
  • Save ishiduca/1239483 to your computer and use it in GitHub Desktop.
Save ishiduca/1239483 to your computer and use it in GitHub Desktop.
メロンブックス検索の結果を ハッシュリファレンスで返す
package WWW::Search::Scrape::Melon;
use strict;
use Carp;
use utf8;
use Encode;
use LWP::UserAgent;
use URI::Escape;
use Web::Scraper;
use Data::Dumper;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
require Exporter;
our $VERSION = '0.01';
our @ISA = qw(Exporter);
our @EXPORT = qw(search);
our @EXPORT_OK = qw();
my $home = 'http://shop.melonbooks.co.jp';
my $check_age = "${home}/shop/check_age.php";
my $index_php = "${home}/shop/index.php";
my $pass_checked = "${home}/shop/top/main";
my $search = "${home}/shop/list/";
sub search {
my %params = @_;
unless ($params{keyword}) {
Carp::carp qq(! failed: "keyword" parameter not found.);
return undef;
}
my $ua = LWP::UserAgent->new( cookie_jar => {} );
push @{ $ua->requests_redirectable }, 'POST';
my $res = $ua->get($check_age);
unless ($res->is_success) {
Carp::carp $res->status_line;
return undef;
}
if ($params{'_test'}) {
Carp::carp Dumper $res;
Carp::carp qq(-----------------------------------\n);
}
$res = $ua->post($index_php,
'Referer' => $check_age,
content => { 'LIVRET' => 'off', 'RATED' => '18' },
);
unless ($res->is_success) {
Carp::carp $res->status_line;
return undef;
}
if ($params{'_test'}) {
Carp::carp Dumper $res;
Carp::carp qq(-----------------------------------\n);
}
my $query = uri_escape_utf8 $params{keyword};
$res = $ua->get("${search}?DA=de&F=${query}&ST=0&SC=0&G=&E=ON&CR[]=18&CR[]=15&CR[]=0&O=maker&P=30&DS=desc",
'Referer' => $pass_checked,
);
unless ($res->is_success) {
Carp::carp $res->status_line;
return undef;
}
if ($params{'_test'}) {
Carp::carp Dumper $res;
Carp::carp qq(-----------------------------------\n);
}
_get_list($res->decoded_content);
}
sub _get_list {
my $html = shift;
my $scraper = scraper {
process '/html/body/table/tbody/tr[3]/td[2]/table/tbody/tr[2]/td/div/table/tr', 'lists[]' => scraper {
process '//td[1]/table[@class="list_desc_innertable_img"]/tr/td/div/a/img', 'urlOfThumbnail' => [ '@src', sub { $_->as_string } ];
process '//td[2]/table/tr[1]/td/font', 'title' => 'TEXT';
process '//td[2]/table/tr[2]/td/font/a', 'circle' => 'TEXT';
process '//td[2]/table/tr[2]/td/font/a', 'urlOfCircle' => [ '@href', sub { $_->as_string } ];
};
};
my $results = $scraper->scrape($html, 'http://shop.melonbooks.co.jp');
return [ grep{ $_->{title} and $_ }@{$results->{lists}} ];
}
1;
__END__
=head1 NAME
WWW::Search::Scrape::Melon
=head1 SYNOPSIS
use WWW::Search::Scrape::Melon;
use utf8;
use JSON;
my $result = WWW::Search::Scrape::Melon::search(
keyword => '紅茶屋'
);
die qq(Dawn...) unless $result;
print encode_json $result;
=head1 DESCRIPTION
WWW::Search::Scrape::Melon provide a simple interface to get top search results from melonbooks.co.jp
and return a list of search results by hash reference.
=cut
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment