Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@ishiduca
Created May 15, 2012 01:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ishiduca/2698475 to your computer and use it in GitHub Desktop.
Save ishiduca/2698475 to your computer and use it in GitHub Desktop.
Meta::Doujin::Search - WebSocket(Web::Hippie) + Dancer + Twiggy
package AnyEvent::WebService::ComicZin;
use strict;
use utf8;
use Encode;
use Carp;
use parent qw(AnyEvent::WebService::ToranoAna);
use URI::Escape;
use Web::Scraper;
our $VERSION = '0.01';
our $HOME = 'http://shop.comiczin.jp';
our $SEARCH_URI = "${HOME}/products/list.php";
sub _create_uri {
my $self = shift;
my %args = @_;
join '', $SEARCH_URI, '?mode=search&name=', uri_escape_utf8($args{keyword});
}
sub _get_list {
my $self = shift;
my $body = decode_utf8 shift;
my $scraper = scraper {
process '//form[@id="form1"]/ul/li/div/div[@class="data_area"]', 'list[]' => scraper {
process '//a', 'urlOfTitle' => [ '@href', sub { return $_->as_string; } ];
process '//a/img', 'urlOfThumbnail' => [ '@src', sub { return $_->as_string; } ];
process '//a/img', 'title' => '@alt';
process '//p', 'circle' => [ 'TEXT', sub {
my @details = split /\s/, $_;
pop @details;
pop @details;
pop @details;
return join(' ', @details);
} ];
};
};
my $res;
eval { $res = $scraper->scrape( $body, $HOME); };
if ($@) {
Carp::carp qq(! failed: $@);
return undef;
}
$res->{list} or [];
}
1;
__END__
package AnyEvent::WebService::MelonBooks;
use strict;
use Carp;
use utf8;
use Encode;
use AnyEvent;
use AnyEvent::HTTP;
use URI::Escape;
use Web::Scraper;
use Data::Dumper;
our $VERSION = '0.01';
our $HOME = 'http://shop.melonbooks.co.jp';
our $SEARCH_URI = "${HOME}/shop/list";
my $check_age = "${HOME}/shop/check_age.php";
my $index = "${HOME}/shop/index.php";
my $pass_checked = "${HOME}/shop/top/main";
sub new {
my $class = shift;
my $cb = pop;
my %args = @_;
my $headers = delete $args{headers} || { 'connection' => 'keep-alive' };
my $on_error = delete $args{on_error} || sub { die @_ };
my $on_header = delete $args{on_header} || sub {
my $hdr = shift;
if ($hdr->{Status} ne '200') {
$on_error->(qq(! login failed: "${check_age}" $hdr->{Status} $hdr->{Reason}));
return;
}
return 1;
};
Carp::croak 'argument "cb" must be "CODE"' unless ref $cb eq 'CODE';
Carp::croak 'argument "keyword" not found' unless $args{keyword};
my $self = bless {}, $class;
$self->{cookie_jar} = $args{cookie_jar} || {};
my $guard; $guard = sub {
http_request GET => $check_age,
headers => $headers,
cookie_jar => $self->{cookie_jar},
on_header => $on_header,
sub {
my(undef, $hdr) = @_;
if ($hdr->{URL} ne $check_age) {
$on_error->(qq(! login failed: "${check_age}" $hdr->{Status} $hdr->{Reason}));
return;
}
$headers->{referer} = $check_age;
$headers->{'content-type'} = 'application/x-www-form-urlencoded';
http_request POST => $index,
body => "LIVRET=off&RATED=18",
headers => $headers,
cookie_jar => $self->{cookie_jar},
on_header => $on_header,
sub {
my(undef, $hdr) = @_;
if ($hdr->{URL} ne $pass_checked) {
$on_error->(qq(! login failed: "${check_age}" $hdr->{Status} $hdr->{Reason}));
return;
}
$headers->{referer} = $pass_checked;
delete $headers->{'content-type'};
my $uri = $self->_create_uri(%args);
http_request GET => $uri,
headers => $headers,
cookie_jar => $self->{cookie_jar},
on_header => $on_header,
sub {
my($body, $hdr) = @_;
$cb->($self->_get_list( $body ));
};
};
};
};
$self->{guard} = AnyEvent::Util::guard { undef $guard; };
$guard->();
$self;
}
sub _create_uri {
my $self = shift;
my %args = @_;
my $query = uri_escape_utf8 $args{keyword};
"${SEARCH_URI}?DA=de&F=${query}&ST=0&SC=0&G=&E=ON&CR[]=18&CR[]=15&CR[]=0&O=maker&P=30&DS=desc";
}
sub _get_list {
my $self = shift;
my $body = decode_utf8 shift;
my $scraper = scraper {
process '/html/body/table/tbody/tr[3]/td[2]/table/tbody/tr[2]/td/div/table/tr', 'lists[]' => scraper {
process '//td[1]/table[@class="list_desc_innertable_img"]/tr/td/div/a/img', 'urlOfThumbnail' => [ '@src', sub { $_->as_string } ];
process '//td[2]/table/tr[1]/td/font', 'title' => 'TEXT';
process '//td[2]/table/tr[2]/td/font/a', 'circle' => 'TEXT';
process '//td[2]/table/tr[2]/td/font/a', 'urlOfCircle' => [ '@href', sub { $_->as_string } ];
};
};
my $results = $scraper->scrape($body, 'http://shop.melonbooks.co.jp');
return [ grep{ $_->{title} and $_ }@{$results->{lists}} ];
}
1;
__END__
package AnyEvent::WebService::ToranoAna;
use strict;
use Carp;
use utf8;
use Encode;
use AnyEvent;
use AnyEvent::HTTP;
use URI::Escape;
our $VERSION = '0.01';
our $HOME = 'http://www.toranoana.jp';
our $SEARCH_URI = "${HOME}/cgi-bin/R2/d_search.cgi";
my $enc_sjis = find_encoding('cp932'); # 'Shift_JIS'
my %params = (
item_kind => '0401',
bl_flg => '0',
adl => '0',
obj => '0',
stk => '1',
img => '1',
ps => '1', # next => 31,
);
sub _create_uri {
my $self = shift;
my %args = @_;
my @querys = ( join '=', ($args{mode} || 'mak'), $enc_sjis->encode($args{keyword}) );
for my $key (keys %params) {
my $param = $args{$key} || $params{$key};
push @querys, join '=', $key, uri_escape $enc_sjis->encode($param);
}
join '?', $SEARCH_URI, join('&', @querys);
}
# _get_list
sub _get_list {
my $self = shift;
my $body = shift;
return { error => '"body" not found' } unless $body;
$body = $enc_sjis->decode($body);
(my $reg=<<'REG') =~ tr/\n//d;
(/mailorder/article/[^"]+)"><img src="([^"]+)".*?
title="([^"]+).*?
<span class="txt-9pt">.+?<a href="\s([^"]+)">(.*?)</a>
REG
;
my $list = [];
while ($body =~ m:$reg:gs) {
my($url_title, $url_thumbnail, $title, $url_circle, $circle)
= ($1, $2, $3, $4, $5);
$circle =~ s/<img([^>]+?)>//;
push @{$list}, {
circle => $circle,
title => $title,
urlOfTitle => "${HOME}${url_title}",
urlOfCircle => "${HOME}${url_circle}",
urlOfThumbnail => $url_thumbnail,
};
}
$list;
}
sub new {
my $class = shift;
my $cb = pop;
my %args = @_;
my $headers = delete $args{headers} || { 'connection' => 'keep-alive' };
my $on_error = delete $args{on_error} || sub { die @_ };
Carp::croak 'argument "cb" must be "CODE"' unless ref $cb eq 'CODE';
Carp::croak 'arugment "keyword" not found' unless $args{keyword};
my $self = bless {}, $class;
my $uri = $self->_create_uri(%args);
my $guard; $guard = sub {
http_request GET => $uri, headers => $headers, on_header => sub {
my $hdr = shift;
if ($hdr->{Status} ne '200') {
$on_error->(": $hdr->{Status} $hdr->{Reason}");
return ;
}
return 1;
}, sub {
my($body, $hdr) = @_;
$cb->($self->_get_list($body));
};
};
$self->{guard} = AnyEvent::Util::guard { undef $guard; };
$guard->();
return $self;
}
1;
__END__
=head1 NAME
AnyEvent::WebService::ToranoAna - Toranoana Search interface for AnyEvent-based programs
=head1 SYNOPSIS
use utf8;
use AnyEvent::WebService::ToranoAna;
use JSON;
my $cv = AE::cv;
my $client = AnyEvent::WebService::ToranoAna->new(
mode => 'mak',
keyword => '絶対少女',
sub {
my $response = shift;
print encode_json $response;
$cv->send();
},
);
$cv->recv();
=head1 DESCRIPTION
AnyEvent::Search::Scrape::Toranoana provide a simple interface for AnyEvent-based programs.
it get top search results from www.toranoana.jp and return a list of search results by hash reference.
=cut
package AnyEvent::WebService::ComicZin;
use strict;
use utf8;
use Encode;
use Carp;
use parent qw(AnyEvent::WebService::ToranoAna);
use URI::Escape;
use Web::Scraper;
our $VERSION = '0.01';
our $HOME = 'http://shop.comiczin.jp';
our $SEARCH_URI = "${HOME}/products/list.php";
sub _create_uri {
my $self = shift;
my %args = @_;
join '', $SEARCH_URI, '?mode=search&name=', uri_escape_utf8($args{keyword});
}
sub _get_list {
my $self = shift;
my $body = decode_utf8 shift;
my $scraper = scraper {
process '//form[@id="form1"]/ul/li/div/div[@class="data_area"]', 'list[]' => scraper {
process '//a', 'urlOfTitle' => [ '@href', sub { return $_->as_string; } ];
process '//a/img', 'urlOfThumbnail' => [ '@src', sub { return $_->as_string; } ];
process '//a/img', 'title' => '@alt';
process '//p', 'circle' => [ 'TEXT', sub {
my @details = split /\s/, $_;
pop @details;
pop @details;
pop @details;
return join(' ', @details);
} ];
};
};
my $res;
eval { $res = $scraper->scrape( $body, $HOME); };
if ($@) {
Carp::carp qq(! failed: $@);
return undef;
}
$res->{list} or [];
}
1;
__END__
package AnyEvent::WebService::MelonBooks;
use strict;
use Carp;
use utf8;
use Encode;
use AnyEvent;
use AnyEvent::HTTP;
use URI::Escape;
use Web::Scraper;
use Data::Dumper;
our $VERSION = '0.01';
our $HOME = 'http://shop.melonbooks.co.jp';
our $SEARCH_URI = "${HOME}/shop/list";
my $check_age = "${HOME}/shop/check_age.php";
my $index = "${HOME}/shop/index.php";
my $pass_checked = "${HOME}/shop/top/main";
sub new {
my $class = shift;
my $cb = pop;
my %args = @_;
my $headers = delete $args{headers} || { 'connection' => 'keep-alive' };
my $on_error = delete $args{on_error} || sub { die @_ };
my $on_header = delete $args{on_header} || sub {
my $hdr = shift;
if ($hdr->{Status} ne '200') {
$on_error->(qq(! login failed: "${check_age}" $hdr->{Status} $hdr->{Reason}));
return;
}
return 1;
};
Carp::croak 'argument "cb" must be "CODE"' unless ref $cb eq 'CODE';
Carp::croak 'argument "keyword" not found' unless $args{keyword};
my $self = bless {}, $class;
$self->{cookie_jar} = $args{cookie_jar} || {};
my $guard; $guard = sub {
http_request GET => $check_age,
headers => $headers,
cookie_jar => $self->{cookie_jar},
on_header => $on_header,
sub {
my(undef, $hdr) = @_;
if ($hdr->{URL} ne $check_age) {
$on_error->(qq(! login failed: "${check_age}" $hdr->{Status} $hdr->{Reason}));
return;
}
$headers->{referer} = $check_age;
$headers->{'content-type'} = 'application/x-www-form-urlencoded';
http_request POST => $index,
body => "LIVRET=off&RATED=18",
headers => $headers,
cookie_jar => $self->{cookie_jar},
on_header => $on_header,
sub {
my(undef, $hdr) = @_;
if ($hdr->{URL} ne $pass_checked) {
$on_error->(qq(! login failed: "${check_age}" $hdr->{Status} $hdr->{Reason}));
return;
}
$headers->{referer} = $pass_checked;
delete $headers->{'content-type'};
my $uri = $self->_create_uri(%args);
http_request GET => $uri,
headers => $headers,
cookie_jar => $self->{cookie_jar},
on_header => $on_header,
sub {
my($body, $hdr) = @_;
$cb->($self->_get_list( $body ));
};
};
};
};
$self->{guard} = AnyEvent::Util::guard { undef $guard; };
$guard->();
$self;
}
sub _create_uri {
my $self = shift;
my %args = @_;
my $query = uri_escape_utf8 $args{keyword};
"${SEARCH_URI}?DA=de&F=${query}&ST=0&SC=0&G=&E=ON&CR[]=18&CR[]=15&CR[]=0&O=maker&P=30&DS=desc";
}
sub _get_list {
my $self = shift;
my $body = decode_utf8 shift;
my $scraper = scraper {
process '/html/body/table/tbody/tr[3]/td[2]/table/tbody/tr[2]/td/div/table/tr', 'lists[]' => scraper {
process '//td[1]/table[@class="list_desc_innertable_img"]/tr/td/div/a/img', 'urlOfThumbnail' => [ '@src', sub { $_->as_string } ];
process '//td[2]/table/tr[1]/td/font', 'title' => 'TEXT';
process '//td[2]/table/tr[2]/td/font/a', 'circle' => 'TEXT';
process '//td[2]/table/tr[2]/td/font/a', 'urlOfCircle' => [ '@href', sub { $_->as_string } ];
};
};
my $results = $scraper->scrape($body, 'http://shop.melonbooks.co.jp');
return [ grep{ $_->{title} and $_ }@{$results->{lists}} ];
}
1;
__END__
package AnyEvent::WebService::ToranoAna;
use strict;
use Carp;
use utf8;
use Encode;
use AnyEvent;
use AnyEvent::HTTP;
use URI::Escape;
our $VERSION = '0.02';
our $HOME = 'http://www.toranoana.jp';
our $SEARCH_URI = "${HOME}/cgi-bin/R2/d_search.cgi";
my $enc_sjis = find_encoding('cp932'); # 'Shift_JIS'
my %params = (
item_kind => '0401',
bl_flg => '0',
adl => '0',
obj => '0',
stk => '1',
img => '1',
ps => '1', # next => 31,
);
sub _create_uri {
my $self = shift;
my %args = @_;
my @querys = ( join '=', ($args{mode} || 'mak'), uri_escape $enc_sjis->encode($args{keyword}) );
for my $key (keys %params) {
my $param = $args{$key} || $params{$key};
push @querys, join '=', $key, uri_escape $enc_sjis->encode($param);
}
warn join '?', $SEARCH_URI, join('&', @querys);
join '?', $SEARCH_URI, join('&', @querys);
}
# _get_list
sub _get_list {
my $self = shift;
my $body = shift;
return { error => '"body" not found' } unless $body;
$body = $enc_sjis->decode($body);
(my $reg=<<'REG') =~ tr/\n//d;
(/mailorder/article/[^"]+)"><img src="([^"]+)".*?
title="([^"]+).*?
<span class="txt-9pt">.+?<a href="\s([^"]+)">(.*?)</a>
REG
;
my $list = [];
while ($body =~ m:$reg:gs) {
my($url_title, $url_thumbnail, $title, $url_circle, $circle)
= ($1, $2, $3, $4, $5);
$circle =~ s/<img([^>]+?)>//;
push @{$list}, {
circle => $circle,
title => $title,
urlOfTitle => "${HOME}${url_title}",
urlOfCircle => "${HOME}${url_circle}",
urlOfThumbnail => $url_thumbnail,
};
}
$list;
}
sub new {
my $class = shift;
my $cb = pop;
my %args = @_;
my $headers = delete $args{headers} || { 'connection' => 'keep-alive' };
my $on_error = delete $args{on_error} || sub { die @_ };
Carp::croak 'argument "cb" must be "CODE"' unless ref $cb eq 'CODE';
Carp::croak 'arugment "keyword" not found' unless $args{keyword};
my $self = bless {}, $class;
my $uri = $self->_create_uri(%args);
my $guard; $guard = sub {
http_request GET => $uri, headers => $headers, on_header => sub {
my $hdr = shift;
if ($hdr->{Status} ne '200') {
$on_error->(": $hdr->{Status} $hdr->{Reason}");
return ;
}
return 1;
}, sub {
my($body, $hdr) = @_;
$cb->($self->_get_list($body));
};
};
$self->{guard} = AnyEvent::Util::guard { undef $guard; };
$guard->();
return $self;
}
1;
__END__
=head1 NAME
AnyEvent::WebService::ToranoAna - Toranoana Search interface for AnyEvent-based programs
=head1 SYNOPSIS
use utf8;
use AnyEvent::WebService::ToranoAna;
use JSON;
my $cv = AE::cv;
my $client = AnyEvent::WebService::ToranoAna->new(
mode => 'mak',
keyword => '絶対少女',
sub {
my $response = shift;
print encode_json $response;
$cv->send();
},
);
$cv->recv();
=head1 DESCRIPTION
AnyEvent::WebService::ToranoAna provide a simple interface for AnyEvent-based programs.
it get top search results from www.toranoana.jp and return a list of search results by hash reference.
=cut
.
|-- app.psgi
|-- lib
| `-- AnyEvent
| `-- WebService
| |-- ComicZin.pm
| |-- MelonBooks.pm
| `-- ToranoAna.pm
|-- public
| |-- css
| | `-- index.css
| `-- js
| `-- index.js
|-- views
`-- index.tt
package Service;
use strict;
use utf8;
use lib 'lib';
use AnyEvent::WebService::ToranoAna;
use AnyEvent::WebService::MelonBooks;
use AnyEvent::WebService::ComicZin;
use Dancer qw(:syntax);
#use JSON;
# Web::Hippie route
get '/message' => sub {
my $message = request->env->{'hippie.message'};
my $handle = request->env->{'hippie.handle'};
for my $shop (qw/ToranoAna MelonBooks ComicZin/) {
my $client;$client = ("AnyEvent::WebService::$shop")->new(
keyword => $message->{keyword},
mode => $message->{mode},
sub {
my($response) = @_;
$handle->send_msg({ shop => $shop, list => $response });
undef $client;
});
}
};
package main;
use strict;
use Dancer ;
use Plack::Builder;
set 'charset' => 'UTF-8';
set 'template' => 'template_toolkit';
set 'engines' => {
'template_toolkit' => {
'ENCODING' => 'utf8'
}
};
get '/' => sub {
template 'index' => {
title => 'Meta::Doujin::Search',
options => [ qw/mak nam gnr com mch act/ ]
};
};
builder {
mount '/' => dance;
mount '/_hippie' => builder {
enable "+Web::Hippie";
Service::dance;
};
};
html, html>body, body {
font-family: "Gill Sans", Helvetica, Arial, sans-serif;
}
#main {
margin-top : 48px;
}
.inline_block, .inline-block {
display : inline-block;
width : 240px;
height : 240px;
margin : 6px;
border : 1px solid #33aaff;
text-align : center;
}
.item_img, .item-img {
text-align : center;
}
section {
display : block;
text-align : center;
padding-bottom : 48px;
}
#f {
position : fixed;
top : 0;
left : 0;
z-index : 10;
padding : 0 3px 3px 3px;
height : 23px;
width : 100%;
background-color : #ffffff;
}
#keyword {
border : 0;
font-size : 11pt;
width : 40em;
border-left : 6px solid #999999;
}
#status {
position : fixed;
top : 26px;
left : 0;
z-index : 10;
padding : 3px 6px;
height : 20px;
width : 100%;
}
function gid (id) {
return document.getElementById(id);
}
function joint (joint) {
if (typeof joint === 'undefined') joint = '';
return function () { return Array.prototype.join.apply(arguments, [ joint ]); };
}
function escapeHTML (str) {
return str.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/\"/g, '&quot;')
;
}
function SendMessage (options) {
this.keyword = options.keyword;
this.mode = options.mode;
this.ws = options.ws;
}
function View (list, output) {
this.list = list;
this.output = output;
}
(function (vp) {
vp.buildList = function () {
var that = this
, list = this.list
, output = this.output
;
if (list && list.length && list.length > 0) {
list.reverse().forEach(function (li) {
var span = document.createElement('span')
, title = (li.urlOfTitle)
? joint()('<a href="', li.urlOfTitle, '" target="_blank">', escapeHTML(li.title), '</a>')
: escapeHTML(li.title)
, thumbnail = (li.urlOfTitle)
? joint()('<br /><a href="', li.urlOfTitle, '" target="_blank"><img src="', li.urlOfThumbnail, '" class="item-img" /></a><br />')
: joint()('<br /><img src="', li.urlOfThumbnail, '" class="item-img" /><br />')
, circle = (li.urlOfCircle)
? joint()('<a href="', li.urlOfCircle, '" target="_blank">', escapeHTML(li.circle), '</a>')
: escapeHTML(li.circle)
;
span.className = 'inline-block';
span.innerHTML = [ title, thumbnail, circle ].join('');
output.insertBefore(span, output.childNodes[0]);
});
}
};
})(View.prototype);
window.onload = function (e) {
var url = window.location.href
, reg = /^(http|https)(:\/\/.+?)\//
, host_port = (url.match(reg))[2]
, ws_path = "ws" + host_port + "/_hippie/ws"
, ws = new WebSocket(ws_path)
, $status = gid('status')
, $list = gid('list')
, message = new SendMessage({
keyword : gid('keyword')
, mode : gid('mode')
, ws : ws
})
;
message.send = function () {
var keyword = message.keyword.value
, mode = message.mode.options[message.mode.selectedIndex].value
, json
;
if (keyword === '') return;
json = JSON.stringify({
keyword : keyword // encodeURIComponent(keyword)
, mode : mode
});
message.ws.send(json);
message.keyword.value = '';
message.keyword.focus();
};
message.receive = function (data) {
var view;
data = JSON.parse(data);
if (data.shop && data.list && data.list.length && data.list.length > 0) {
view = new View(data.list, $list);
view.buildList();
}
};
gid('f').onsubmit = function () {
message.send();
};
ws.onopen = function () {
$status.style.backgroundColor = '#00ff00';
$status.style.color = '#000000';
$status.innerHTML = 'connected webSocket';
};
ws.onclose = function () {
$status.style.backgroundColor = '#000000';
$status.style.color = '#cccccc';
$status.innerHTML = 'disconnected webSocket';
};
ws.onerror = function (e) {
consoloe.log(e);
$status.style.backgroundColor = '#ff0000';
$status.style.color = '#cccccc';
$status.innerHTML = '! error';
};
ws.onmessage = function (e) {
console.log(e.data);
message.receive(e.data);
};
};
<!doctype html>
<head>
<meta charset="utf-8" />
<link rel="stylesheet" href="/css/index.css" type="text/css" />
<script src="/js/index.js"></script>
<title><% title %></title>
</head>
<body>
<div id="main">
<h1 id="title">Meta::Doujin::Search</h1>
<form id="f" action="javascript:void(0);">
<input type="text" id="keyword" />
<select id="mode">
<% FOREACH val = options %>
<option value="<% val %>"><% val %></option>
<% END %>
</select>
<button type="submit">post</button>
</form>
<div id="status">websocket is not connected yet.</div>
<section id="list"></section>
</div>
</body>
@ishiduca
Copy link
Author

plackup -s Twiggy

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment