Skip to content

Instantly share code, notes, and snippets.

@xaicron
Created March 7, 2010 14:30
Show Gist options
  • Save xaicron/324387 to your computer and use it in GitHub Desktop.
Save xaicron/324387 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
use strict;
use warnings;
use autodie;
use Config::Pit qw/pit_get/;
use Web::Scraper qw/scraper process/;
use WWW::Mechanize;
use LWP::Simple qw/mirror/;
use File::Basename qw/basename/;
$|++;
my $id = shift || die 'Usage: get_twitpic user_id';
my $conf = pit_get 'twitter.com';
my $uri_base = 'http://twitpic.com';
mkdir $id unless -d $id;
chdir $id;
my $mech = WWW::Mechanize->new(autocheck => 1);
$mech->get('http://twitpic.com/');
$mech->submit_form(
fields => {
username => $conf->{username},
password => $conf->{password},
},
);
my $list = scraper {
process '//*[@class="profile-photo-img"]/a', 'links[]' => '@href';
process '//*[@id="profile-photos"]/*/a[@class="nav"]', 'navi_urls[]' => '@href';
process '//*[@id="profile-photos"]/*/a[@class="nav"]', 'navi_texts[]' => 'TEXT';
};
my $image = scraper {
process '//*[@id="photo"]/img', 'img' => [ '@src', sub { "$_[0]" } ];
};
main();
exit;
sub main {
my $res = get_images("$uri_base/photos/$id");
while (my $path = get_next_url($res)) {
$res = get_images("$uri_base/$path");
}
}
sub get_images {
my $url = shift;
$mech->get($url);
my $res = $list->scrape($mech->content);
for my $src (@{$res->{links}}) {
$mech->get("$uri_base$src");
my $img = $image->scrape($mech->content)->{img};
_store($img);
}
return $res;
}
sub _store {
my $url = shift;
(my $file = basename $url) =~ s/\?.*//;
$file =~ s/-[\w+.]+-\w+//;
print STDERR "SKIP: $file\n" and return if -e $file;
print "getting $file ... ";
mirror $url, $file;
print "done.\n";
}
sub get_next_url {
my $res = shift;
my $urls = $res->{navi_urls};
my $texts = $res->{navi_texts};
return $urls->[-1] if @$urls >= 2 && $texts->[-1] =~ /OLDER/ && $urls->[-1] =~ /\?page=/;
return $urls->[0] if $texts->[0] =~ /OLDER/ && $urls->[0] =~ /\?page=/;
return;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment