xaicron/gist:324387

## gistfile1.PL
#!/usr/bin/env perl

use strict;
use warnings;
use autodie;
use Config::Pit qw/pit_get/;
use Web::Scraper qw/scraper process/;
use WWW::Mechanize;
use LWP::Simple qw/mirror/;
use File::Basename qw/basename/;

$|++;

my $id = shift || die 'Usage: get_twitpic user_id';
my $conf = pit_get 'twitter.com';
my $uri_base = 'http://twitpic.com';
mkdir $id unless -d $id;
chdir $id;

my $mech = WWW::Mechanize->new(autocheck => 1);
$mech->get('http://twitpic.com/');
$mech->submit_form(
    fields => {
        username => $conf->{username},
        password => $conf->{password},
    },
);

my $list = scraper {
    process '//*[@class="profile-photo-img"]/a', 'links[]' => '@href';
    process '//*[@id="profile-photos"]/*/a[@class="nav"]', 'navi_urls[]' => '@href';
    process '//*[@id="profile-photos"]/*/a[@class="nav"]', 'navi_texts[]' => 'TEXT';
};

my $image = scraper {
    process '//*[@id="photo"]/img', 'img' => [ '@src', sub { "$_[0]" } ];
};

main();
exit;

sub main {
    my $res = get_images("$uri_base/photos/$id");
    while (my $path = get_next_url($res)) {
        $res = get_images("$uri_base/$path");
    }
}

sub get_images {
    my $url = shift;
    $mech->get($url);
    my $res = $list->scrape($mech->content);
    for my $src (@{$res->{links}}) {
        $mech->get("$uri_base$src");
        my $img = $image->scrape($mech->content)->{img};
        _store($img);
    }
    return $res;
}

sub _store {
    my $url = shift;
    (my $file = basename $url) =~ s/\?.*//;
    $file =~ s/-[\w+.]+-\w+//;
    print STDERR "SKIP: $file\n" and return if -e $file;
    print "getting $file ... ";
    mirror $url, $file;
    print "done.\n";
}

sub get_next_url {
    my $res = shift;
    my $urls = $res->{navi_urls};
    my $texts = $res->{navi_texts};
    return $urls->[-1] if @$urls >= 2 && $texts->[-1] =~ /OLDER/ && $urls->[-1] =~ /\?page=/;
    return $urls->[0] if $texts->[0] =~ /OLDER/ && $urls->[0] =~ /\?page=/;
    return;
}
	#!/usr/bin/env perl

	use strict;
	use warnings;
	use autodie;
	use Config::Pit qw/pit_get/;
	use Web::Scraper qw/scraper process/;
	use WWW::Mechanize;
	use LWP::Simple qw/mirror/;
	use File::Basename qw/basename/;

	$\|++;

	my $id = shift \|\| die 'Usage: get_twitpic user_id';
	my $conf = pit_get 'twitter.com';
	my $uri_base = 'http://twitpic.com';
	mkdir $id unless -d $id;
	chdir $id;

	my $mech = WWW::Mechanize->new(autocheck => 1);
	$mech->get('http://twitpic.com/');
	$mech->submit_form(
	fields => {
	username => $conf->{username},
	password => $conf->{password},
	},
	);

	my $list = scraper {
	process '//*[@class="profile-photo-img"]/a', 'links[]' => '@href';
	process '//[@id="profile-photos"]//a[@class="nav"]', 'navi_urls[]' => '@href';
	process '//[@id="profile-photos"]//a[@class="nav"]', 'navi_texts[]' => 'TEXT';
	};

	my $image = scraper {
	process '//*[@id="photo"]/img', 'img' => [ '@src', sub { "$_[0]" } ];
	};

	main();
	exit;

	sub main {
	my $res = get_images("$uri_base/photos/$id");
	while (my $path = get_next_url($res)) {
	$res = get_images("$uri_base/$path");
	}
	}

	sub get_images {
	my $url = shift;
	$mech->get($url);
	my $res = $list->scrape($mech->content);
	for my $src (@{$res->{links}}) {
	$mech->get("$uri_base$src");
	my $img = $image->scrape($mech->content)->{img};
	_store($img);
	}
	return $res;
	}

	sub _store {
	my $url = shift;
	(my $file = basename $url) =~ s/\?.*//;
	$file =~ s/-[\w+.]+-\w+//;
	print STDERR "SKIP: $file\n" and return if -e $file;
	print "getting $file ... ";
	mirror $url, $file;
	print "done.\n";
	}

	sub get_next_url {
	my $res = shift;
	my $urls = $res->{navi_urls};
	my $texts = $res->{navi_texts};
	return $urls->[-1] if @$urls >= 2 && $texts->[-1] =~ /OLDER/ && $urls->[-1] =~ /\?page=/;
	return $urls->[0] if $texts->[0] =~ /OLDER/ && $urls->[0] =~ /\?page=/;
	return;
	}