Skip to content

Instantly share code, notes, and snippets.

@mizzy
Created December 30, 2009 17:43
Show Gist options
  • Save mizzy/266224 to your computer and use it in GitHub Desktop.
Save mizzy/266224 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
use strict;
use warnings;
use Web::Scraper;
use URI;
use YAML;
use DateTime;
use DateTime::Format::W3CDTF;
use Encode;
my $uri = URI->new('http://www.clubcitta.co.jp/information.html');
my $s = scraper {
process 'div.information', 'entry[]' => scraper {
process '.information', id => '@id';
process '.date', date => 'TEXT';
process '.title', title => 'TEXT';
process '.comment', body => 'TEXT';
};
};
my $feed = $s->scrape($uri);
my $w3c = DateTime::Format::W3CDTF->new;
my $prev_month = 0;
my $subtract_year = 0;
for my $entry ( @{ $feed->{entry} } ) {
my ( $month, $day ) = ( $entry->{date} =~ m!(\d+)/(\d+)! );
my $dt = DateTime->now;
$dt->set_month($month);
$dt->set_day($day);
$dt->set_hour(0);
$dt->set_minute(0);
$dt->set_second(0);
if ( $prev_month > $month or $subtract_year) {
$subtract_year = 1;
$dt->subtract( years => 1 );
}
$entry->{date} = $w3c->format_datetime($dt);
$entry->{link} = $uri->as_string . "#$entry->{id}";
}
$feed->{link} = $uri->as_string;
$feed->{title} = Encode::decode('utf8', 'クラブチッタ 最新情報');
print Dump $feed;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment