xaicron/gist:95591

## pixiv.pl
# pixivから指定したidの画像を取得する

use strict;
use warnings;
use utf8;
use Web::Scraper;
use WWW::Mechanize;
use Encode qw/find_encoding decode_utf8/;
use File::Basename qw/basename/;
use File::Spec::Functions qw/catfile/;
use Getopt::Long qw/GetOptions/;
use Config::Pit qw/pit_get/;

# ファイルシステムのEncoding
my $default_encoding = 'cp932';

# Config::Pitで設定しておいてね！
my $config = pit_get('www.pixiv.net');
my $username = $config->{username} or die 'username not found.';
my $password = $config->{password} or die 'password not found.';

# WindowsFSの禁則文字
my %win32_taboo = (
	 '\\' => '￥',
	 '/'  => '／',
	 ':'  => '：',
	 '*'  => '＊',
	 '?'  => '？',
	 '"'  => '″',
	 '<'  => '＜',
	 '>'  => '＞',
	 '|'  => '｜',
);

GetOptions(\my %opt, 'id=i', 'directory=s', 'page=i', 'encode=s');

my $id = shift || $opt{id} || die "Usage: $0 [-i] id [-d save_directory] [-p max_page_number] [-e encode]";
my $dir = $opt{directory} || $id;
my $max_page = $opt{page} || 1;
my $enc = $opt{encode} || $default_encoding;
$enc = find_encoding($enc);

my $mech = WWW::Mechanize->new(autocheck => 1);

$mech->get('http://www.pixiv.net/index.php');
$mech->submit_form(
	fields => {
		pixiv_id => $username,
		pass     => $password,
	},
);

mkdir $dir or die "$dir $!" unless -d $dir;
chdir $dir or die "$dir $!";

do { get_pict($_, $mech) } for (1..$max_page);

sub get_pict {
	my $page = shift;
	my $mech = shift;

	my $url = "http://www.pixiv.net/member_illust.php?id=" . $id;
	if ($page) {
		$url .= sprintf("&p=%d", $page);
	}

	$mech->get($url);

	my $scraper = scraper {
		process '//*[@id="illust_c4"]/ul/li', 'list[]' => scraper {
			process 'a', url => [ '@href', sub {
				(my $url = $_->as_string) =~ s/mode=medium/mode=big/;
				return $url;
			} ];
			process 'div', title => 'TEXT';
		};
	};

	my $res = $scraper->scrape($mech->content, $mech->uri);

	for my $row (@{$res->{list}}) {
		$mech->get($row->{url});
		my $scraper = scraper {
			process '/html/body/div/a/img', image => [ '@src', sub { $_->as_string } ];
		};
		my $res = $scraper->scrape($mech->content, $mech->uri);

		my $url = $res->{image};
		$mech->get($url);

		my $file = decode_utf8($row->{title});
		$file = &_win32_file_normalize($file) if $^O eq 'MSWin32';
		$file = catfile($enc->encode($file, sub{ sprintf "U+%04X", shift }) . "_" . basename($url));
		print "skip $file\n" and next if -f $file;

		print "Download $file\n";
		open my $fh, '>', $file or die "$file $!";
		binmode $fh;
		print {$fh} $mech->content;
		close $fh;
	}
}

sub _win32_file_normalize {
	my $file_name = shift;
	$file_name =~ s#([\\/:*?"<>|])#$win32_taboo{$1}#ge;
	return $file_name;
}
	# pixivから指定したidの画像を取得する

	use strict;
	use warnings;
	use utf8;
	use Web::Scraper;
	use WWW::Mechanize;
	use Encode qw/find_encoding decode_utf8/;
	use File::Basename qw/basename/;
	use File::Spec::Functions qw/catfile/;
	use Getopt::Long qw/GetOptions/;
	use Config::Pit qw/pit_get/;

	# ファイルシステムのEncoding
	my $default_encoding = 'cp932';

	# Config::Pitで設定しておいてね！
	my $config = pit_get('www.pixiv.net');
	my $username = $config->{username} or die 'username not found.';
	my $password = $config->{password} or die 'password not found.';

	# WindowsFSの禁則文字
	my %win32_taboo = (
	'\\' => '￥',
	'/' => '／',
	':' => '：',
	'*' => '＊',
	'?' => '？',
	'"' => '″',
	'<' => '＜',
	'>' => '＞',
	'\|' => '｜',
	);

	GetOptions(\my %opt, 'id=i', 'directory=s', 'page=i', 'encode=s');

	my $id = shift \|\| $opt{id} \|\| die "Usage: $0 [-i] id [-d save_directory] [-p max_page_number] [-e encode]";
	my $dir = $opt{directory} \|\| $id;
	my $max_page = $opt{page} \|\| 1;
	my $enc = $opt{encode} \|\| $default_encoding;
	$enc = find_encoding($enc);

	my $mech = WWW::Mechanize->new(autocheck => 1);

	$mech->get('http://www.pixiv.net/index.php');
	$mech->submit_form(
	fields => {
	pixiv_id => $username,
	pass => $password,
	},
	);

	mkdir $dir or die "$dir $!" unless -d $dir;
	chdir $dir or die "$dir $!";

	do { get_pict($_, $mech) } for (1..$max_page);

	sub get_pict {
	my $page = shift;
	my $mech = shift;

	my $url = "http://www.pixiv.net/member_illust.php?id=" . $id;
	if ($page) {
	$url .= sprintf("&p=%d", $page);
	}

	$mech->get($url);

	my $scraper = scraper {
	process '//*[@id="illust_c4"]/ul/li', 'list[]' => scraper {
	process 'a', url => [ '@href', sub {
	(my $url = $_->as_string) =~ s/mode=medium/mode=big/;
	return $url;
	} ];
	process 'div', title => 'TEXT';
	};
	};

	my $res = $scraper->scrape($mech->content, $mech->uri);

	for my $row (@{$res->{list}}) {
	$mech->get($row->{url});
	my $scraper = scraper {
	process '/html/body/div/a/img', image => [ '@src', sub { $_->as_string } ];
	};
	my $res = $scraper->scrape($mech->content, $mech->uri);

	my $url = $res->{image};
	$mech->get($url);

	my $file = decode_utf8($row->{title});
	$file = &_win32_file_normalize($file) if $^O eq 'MSWin32';
	$file = catfile($enc->encode($file, sub{ sprintf "U+%04X", shift }) . "_" . basename($url));
	print "skip $file\n" and next if -f $file;

	print "Download $file\n";
	open my $fh, '>', $file or die "$file $!";
	binmode $fh;
	print {$fh} $mech->content;
	close $fh;
	}
	}

	sub _win32_file_normalize {
	my $file_name = shift;
	$file_name =~ s#([\\/:*?"<>\|])#$win32_taboo{$1}#ge;
	return $file_name;
	}