Skip to content

Instantly share code, notes, and snippets.

@benevolent0505
Created December 12, 2016 06:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benevolent0505/fdab044e9e16525927b71f10a38adcf5 to your computer and use it in GitHub Desktop.
Save benevolent0505/fdab044e9e16525927b71f10a38adcf5 to your computer and use it in GitHub Desktop.
UEC休講情報スクレイピングスクリプト
#!/usr/bin/env perl
use strict;
use warnings;
use utf8;
use FindBin;
use lib "$FindBin::Bin/../lib";
use Web::Scraper;
use URI;
use DateTime;
use JSON::XS;
use Redis;
use Redis::List;
use UEC::Kyuuko::Config;
use UEC::Kyuuko::Model::Lesson;
my $server = config->param('heroku_server');
my $password = config->param('heroku_password');
# スクレイピング
my $scraper = scraper {
process 'body table tr', 'classes[]' => scraper {
process 'td', 'class[]' => 'TEXT';
};
process 'p', 'paragraph[]' => 'TEXT';
};
my $url = URI->new('http://kyoumu.office.uec.ac.jp/kyuukou/kyuukou.html');
my $res = $scraper->scrape($url);
# データ整形
my $classes = $res->{classes};
shift @$classes;
my @lessons;
for my $class (@$classes) {
my $c = $class->{class};
push @lessons, UEC::Kyuuko::Model::Lesson->new(
class => shift @$c,
date => shift @$c,
period => shift @$c,
subject => shift @$c,
teacher => shift @$c,
desc => shift @$c,
)->json_hash;
}
## もっといいやり方あるやろ
my $updated_text = pop @{$res->{paragraph}};
my ($updated) = $updated_text =~ /(\d.+\d)/;
my ($year, $month, $day, $hour, $min, $sec) = $updated =~ /(\d+)\/(\d+)\/(\d+)\s(\d+):(\d+):(\d+)/;
my $dt = DateTime->new(
year => $year,
month => $month,
day => $day,
hour => $hour,
minute => $min,
second => $sec,
time_zone => 'Asia/Tokyo',
);
# Redisへの保存
my $redis = Redis->new(server => $server, password => $password);
$redis->flushall;
tie(my @classes, 'Redis::List', 'classes', (server => $server, password => $password));
for (@lessons) {
push @classes, JSON::XS::encode_json($_);
}
$redis->set('updated', $dt);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment