Last active
May 28, 2016 18:17
-
-
Save blue1st/68e57c0cd312babf13b7c274c3e0f6a1 to your computer and use it in GitHub Desktop.
超映画批評をスクレイピング
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
=pod | |
perl fetch_movie_score.pl 1 100|sort -nrk3 -t $'\t' | |
みたいな感じの使い方を想定 | |
=cut | |
use strict; | |
use warnings; | |
use Try::Tiny; | |
use Carp; | |
use LWP::Simple; | |
use Encode; | |
my $begin = $ARGV[0]; | |
my $end = $ARGV[1]; | |
carp "$begin -> $end"; | |
my $id = $begin - 1; | |
while($id < $end){ | |
sleep 1; | |
carp ++$id; | |
my $url = "http://movie.maeda-y.com/movie/@{[sprintf('%05d', $id)]}.htm"; | |
my $html = get($url) or next; | |
$html = Encode::encode_utf8 $html; | |
$html =~ s/ |\t|\n/ /g; | |
next unless $html =~ /<h1 class="title">\s*(?:『|「)(.+)(?:』|」)\s*(\d+)点/; | |
print "$id\t$1\t$2\t$url\n"; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment