Skip to content

Instantly share code, notes, and snippets.

@lotrfan
Created May 21, 2013 19:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save lotrfan/28c4a266468bb7658e95 to your computer and use it in GitHub Desktop.
Save lotrfan/28c4a266468bb7658e95 to your computer and use it in GitHub Desktop.
Download Google Reader archives
#!/usr/bin/perl
use strict;
use warnings;
require LWP::UserAgent;
require HTTP::Cookies;
require HTML::Form;
use Term::ReadKey;
use URI::Escape;
our $FEED = '';
our $FEED_NAME = 'feed';
if (@ARGV) {
if ($#ARGV >= 1) {
$FEED_NAME = $ARGV[0];
$FEED = $ARGV[1];
} else {
$FEED = $ARGV[0];
}
}
our $NUMBER_OF_ITEMS = 1000;
if (! $FEED) {
print "Usage: <script-name> <feed-url>\n";
print "or : <script-name> <feed-name> <feed-url>\n";
print "You may have to use either single-quotes for the feed-url,\n";
print "or take care using shell escapes.\n";
exit(1);
}
our $LOGIN_URL = 'https://accounts.google.com/ServiceLogin';
our $COOKIE_FILE = 'cookies.dat';
our $CACHE_URL = 'http://www.google.com/reader/atom/feed/%feed%?r=n&n=%num%';
our $CONTINUED_URL = $CACHE_URL . '&c=%cont%';
our ($user, $pass);
our $CookieJar = HTTP::Cookies->new(
file => $COOKIE_FILE,
autosave => 1,
);
my $ua = LWP::UserAgent->new;
$ua->timeout(100);
$ua->cookie_jar($CookieJar);
push @{ $ua->requests_redirectable }, 'POST';
#$ua->show_progress(1);
if (! -f $COOKIE_FILE) {
login();
}
our $f = $FEED;
our $url = $CACHE_URL;
$url =~ s/%feed%/$f/;
$url =~ s/%num%/$NUMBER_OF_ITEMS/;
our $pageNum = 1;
our $padding = int(log(20000 / $NUMBER_OF_ITEMS));
our $response;
while ($url) {
my $filename = sprintf "%s-%0" . $padding . "d.xml", $FEED_NAME, $pageNum;
printf "Page %d => %s\n", $pageNum, $filename;
$response = $ua->get($url);
open(my $fh, ">", $filename);
binmode($fh, ":utf8");
print $fh $response->decoded_content;
close($fh);
$url = '';
if ($response->decoded_content =~ /<gr:continuation>(.*?)<\/gr:continuation>/) {
#print "Found continuation: $1\n";
my $cont = $1;
$url = $CONTINUED_URL;
$url =~ s/%feed%/$f/;
$url =~ s/%num%/$NUMBER_OF_ITEMS/;
$url =~ s/%cont%/$cont/;
}
$pageNum ++;
}
sub login {
print "Username: ";
$user = ReadLine(0);
chomp $user;
print "Password: ";
ReadMode('noecho');
$pass = ReadLine(0);
print "\n";
ReadMode(0);
chomp $pass;
my $response = $ua->get($LOGIN_URL);
my $login_form = HTML::Form->parse($response);
$login_form->find_input("Email")->value($user);
$login_form->find_input("Passwd")->value($pass);
my $request = $login_form->click;
$ua->request($request);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment