Skip to content

Instantly share code, notes, and snippets.

@Likk
Last active December 14, 2015 23:59
Show Gist options
  • Save Likk/5169432 to your computer and use it in GitHub Desktop.
Save Likk/5169432 to your computer and use it in GitHub Desktop.
Lingr スクレイピングやっつけ
package WWW::Lingr;
use strict;
use warnings;
use utf8;
use Carp;
use Encode;
use Try::Tiny;
use Web::Scraper;
use WWW::Mechanize;
use Class::Accessor::Lite(
new => 1,
rw => [ qw(room) ],
);
our $BASE_URL = q{http://lingr.com/};
our $ROOM_URL = q{room/__NAME__/archives};
sub mech {
my $self = shift;
$self->{__mech} ||= WWW::Mechanize->new;
}
sub get {
my $self = shift;
my $room = $self->room;
my $room_url = $ROOM_URL;
$room_url =~ s{__NAME__}{$room};
my $res = $self->mech->get($BASE_URL . $room_url);
$self->_parse($res->decoded_content());
}
sub _parse {
my $self = shift;
my $html = shift;
my $scraper = scraper {
process '//div[@id="right"]/div', 'data' => scraper {
process '//div[@class="timestamp"]', 'timestamp[]' => 'TEXT';
process '//span[@class="nickname"]', 'nickname[]' => 'TEXT';
process '//div[@class="decorated"]', 'description[]' => 'TEXT';
};
result 'data';
};
my $result = $scraper->scrape($html);
my $data = [];
for my $index (0..$#{$result->{nickname}}){
my $row = {
nickname => $result->{nickname}->[$index],
description => $result->{description}->[$index],
timestamp => $result->{timestamp}->[$index],
};
push @$data, $row;
}
return $data;
}
1;
=head1 name
WWW::Linger
=head1 SYNOPSIS
use strict;
use warnings;
use utf8;
use WWW::Lingr;
my $lingr = WWW::Lingr->new( room => q{perl_jp}) ;
$lingr->room('perl_jp');
warn YAML::Dump $linger->get;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment