Skip to content

Instantly share code, notes, and snippets.

@mtsukamoto
Created May 18, 2014 14:12
Show Gist options
  • Save mtsukamoto/bebe76f5ee740b41e23f to your computer and use it in GitHub Desktop.
Save mtsukamoto/bebe76f5ee740b41e23f to your computer and use it in GitHub Desktop.
use utf8;
use WWW::Mechanize;
use Encode qw(encode decode);
my $auth = {
email => $ENV{'BOOKSCAN_MAIL'},
password => $ENV{'BOOKSCAN_PASS'},
};
my $mech = WWW::Mechanize->new;
$mech->env_proxy();
my $url = 'https://system.bookscan.co.jp/history.php';
$mech->get( $url );
$mech->submit_form( fields => $auth );
my @details = &parse_history($mech);
foreach my $detail (@details) {
printf("[%s] %s\n", $detail->{'date'}, $detail->{'url'});
$mech->get($detail->{'url'});
my ($number, @pdfs) = &parse_detail($mech);
printf("[number:%s]\n", $number);
my $directory = sprintf("%s_%s", $number, $detail->{'date'});
printf("[directory:%s]\n", $directory);
if (not -d $directory) {
print "mkdir($directory)\n";
mkdir($directory, 0777);
}
foreach my $pdf (@pdfs) {
my $url = $pdf->{'url'};
my $file = sprintf("%s/%s", $directory, $pdf->{'title'});
# $file = encode('cp932', decode('UTF-8', $file));
$file = encode('cp932', $file);
next if (-e $file);
printf("%s\n->$file\n", $url);
$mech->get($url, ':content_file' => $file);
print "\n";
}
}
sub parse_history {
my $mech = shift;
my $content = $mech->content;
my @resutls = ();
my @rows = ($content =~ /<tr[^<>]*>(.*?)<\/tr>/gs);
foreach my $row (@rows) {
next unless ($row =~ /<td width=100>.*?<a href='(\Qbookdetail.php?hash=\E[^']+)'>/s);
my $result = { 'url' => 'https://system.bookscan.co.jp/' . $1 };
if ($row =~ /<td width=130 style="width:117px">((\d+)\D+(\d+)\D+(\d+)\D+)<\/td>/s) {
my ($string, $year, $month, $day) = ($1, $2, $3, $4);
$result->{'date'} = sprintf('%04d.%02d.%02d', $year, $month, $day);
$result->{'datestring'} = $string;
}
push(@resutls, $result);
}
return @resutls;
}
sub parse_detail {
my $mech = shift;
my $content = $mech->content;
my @resutls = ();
my @links = ($content =~ /<a href="\Qdownload.php?\E.+?\.pdf" class="downloading">[^<>]+?<\/a>/g);
my $number = $1 if ($content =~ /<input type="hidden" name="d" value="(.*?)">/);
foreach my $link (@links) {
next unless ($link =~ /<a href="(\Qdownload.php?\E.+?\.pdf)" class="downloading">([^<>]+?)<\/a>/);
my $result = {
'url' => 'https://system.bookscan.co.jp/' . $1,
'title' => $2
};
push(@resutls, $result);
}
return ($number, @resutls);
}
@mtsukamoto
Copy link
Author

Windows+Perl環境用の、Bookscanの依頼済み書籍のPDFをダウンロードするためのスクリプトです。

  • ログイン情報として環境変数BOOKSCAN_MAIL、BOOKSCAN_PASSを使います。
  • カレントディレクトリにPDFがダウンロードされます。
  • Windows以外の環境では「$file = encode('cp932', $file);」の部分を適切な文字コードにすれば動くかもしれません。

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment