Skip to content

Instantly share code, notes, and snippets.

@xaicron
Created April 15, 2009 03:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xaicron/95591 to your computer and use it in GitHub Desktop.
Save xaicron/95591 to your computer and use it in GitHub Desktop.
pixivから指定したユーザーIDの画像一覧を取得する
# pixivから指定したidの画像を取得する
use strict;
use warnings;
use utf8;
use Web::Scraper;
use WWW::Mechanize;
use Encode qw/find_encoding decode_utf8/;
use File::Basename qw/basename/;
use File::Spec::Functions qw/catfile/;
use Getopt::Long qw/GetOptions/;
use Config::Pit qw/pit_get/;
# ファイルシステムのEncoding
my $default_encoding = 'cp932';
# Config::Pitで設定しておいてね!
my $config = pit_get('www.pixiv.net');
my $username = $config->{username} or die 'username not found.';
my $password = $config->{password} or die 'password not found.';
# WindowsFSの禁則文字
my %win32_taboo = (
'\\' => '¥',
'/' => '/',
':' => ':',
'*' => '*',
'?' => '?',
'"' => '″',
'<' => '<',
'>' => '>',
'|' => '|',
);
GetOptions(\my %opt, 'id=i', 'directory=s', 'page=i', 'encode=s');
my $id = shift || $opt{id} || die "Usage: $0 [-i] id [-d save_directory] [-p max_page_number] [-e encode]";
my $dir = $opt{directory} || $id;
my $max_page = $opt{page} || 1;
my $enc = $opt{encode} || $default_encoding;
$enc = find_encoding($enc);
my $mech = WWW::Mechanize->new(autocheck => 1);
$mech->get('http://www.pixiv.net/index.php');
$mech->submit_form(
fields => {
pixiv_id => $username,
pass => $password,
},
);
mkdir $dir or die "$dir $!" unless -d $dir;
chdir $dir or die "$dir $!";
do { get_pict($_, $mech) } for (1..$max_page);
sub get_pict {
my $page = shift;
my $mech = shift;
my $url = "http://www.pixiv.net/member_illust.php?id=" . $id;
if ($page) {
$url .= sprintf("&p=%d", $page);
}
$mech->get($url);
my $scraper = scraper {
process '//*[@id="illust_c4"]/ul/li', 'list[]' => scraper {
process 'a', url => [ '@href', sub {
(my $url = $_->as_string) =~ s/mode=medium/mode=big/;
return $url;
} ];
process 'div', title => 'TEXT';
};
};
my $res = $scraper->scrape($mech->content, $mech->uri);
for my $row (@{$res->{list}}) {
$mech->get($row->{url});
my $scraper = scraper {
process '/html/body/div/a/img', image => [ '@src', sub { $_->as_string } ];
};
my $res = $scraper->scrape($mech->content, $mech->uri);
my $url = $res->{image};
$mech->get($url);
my $file = decode_utf8($row->{title});
$file = &_win32_file_normalize($file) if $^O eq 'MSWin32';
$file = catfile($enc->encode($file, sub{ sprintf "U+%04X", shift }) . "_" . basename($url));
print "skip $file\n" and next if -f $file;
print "Download $file\n";
open my $fh, '>', $file or die "$file $!";
binmode $fh;
print {$fh} $mech->content;
close $fh;
}
}
sub _win32_file_normalize {
my $file_name = shift;
$file_name =~ s#([\\/:*?"<>|])#$win32_taboo{$1}#ge;
return $file_name;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment