Skip to content

Instantly share code, notes, and snippets.

@sasezaki
Created March 27, 2009 11:24
Show Gist options
  • Save sasezaki/86649 to your computer and use it in GitHub Desktop.
Save sasezaki/86649 to your computer and use it in GitHub Desktop.
package WWW::Hanako;
use 5.010000;
use strict;
use warnings;
use Carp;
use LWP::UserAgent;
use WWW::Mechanize;
use Web::Scraper;
use Data::Dumper;
our @ISA = qw();
our $VERSION = '0.05';
my $HANAKO_BASE_URI = 'http://kafun.taiki.go.jp/';
sub new
{
my $proto = shift;
my $class = ref($proto) || $proto;
my $self = {
area => $_{area} || 0,
mst => $_{mst} || 0,
mech => WWW::Mechanize->new(agent=>"Net-Hanako/$VERSION"),
debug => 0,
};
%$self = (%$self, @_);
bless($self, $class);
return $self;
}
sub today
{
my $self = shift;
my @ret;
my $path = sprintf("Hyou0.aspx?MstCode=%d&AreaCode=%02d",
$self->{'mst'}, $self->{'area'});
my $uri = $HANAKO_BASE_URI . $path;
my $uri2 = $HANAKO_BASE_URI . "Hyou2.aspx";
if($self->{debug}){
print "uri: $uri\n";
print "uri2: $uri2\n";
}
# set cookie
$self->{mech}->get($uri);
# get cookie
$self->{mech}->get($uri2);
if(!$self->{mech}->success()){
carp("error");
return;
}
if($self->{mech}->status() != 200){
carp("response code: " . $self->{mech}->status());
return;
}
my $content = $self->{mech}->content();
my $col = scraper {
process '//td[1]','hour' => 'TEXT';
process '//td[2]', 'pollen' => 'TEXT';
process '//td[3]', 'wd' => 'TEXT';
process '//td[4]', 'ws' => 'TEXT';
process '//td[5]', 'temp' => 'TEXT';
process '//td[6]', 'prec' => 'TEXT';
process '//td[7]', 'prec_bool' => 'TEXT';
result 'prec_bool', 'prec', 'temp', 'ws', 'wd','pollen', 'hour';
};
my $scraper = scraper {
process '//table[@id="dgd1"]//tr[position()!=1 and position() !=2]', 'row[]' => $col;
};
my $ret = $scraper->scrape($content, $uri2);
if(!$ret->{"row"}){
carp("scrape error");
return;
}
return @{$ret->{'row'}};
}
sub now
{
my $self = shift;
my @today = $self->today();
my $ret = pop(@today);
return $ret;
}
1;
use Data::Dumper;
my $hanako = WWW::Hanako->new(area=>3, mst=>51300200);
print Dumper($hanako->today());
__END__
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment