Skip to content

Instantly share code, notes, and snippets.

@sergioccrr
Last active December 19, 2015 03:58
Show Gist options
  • Save sergioccrr/5893376 to your computer and use it in GitHub Desktop.
Save sergioccrr/5893376 to your computer and use it in GitHub Desktop.
Download Google Reader feeds.
# -*- coding: utf-8 -*-
import requests
import urllib, time, sys
import string
def filename_string(feed):
valid_chars = "-_() %s%s" % (string.ascii_letters, string.digits)
return ''.join(c for c in feed if c in valid_chars)
def save(feed):
page = 0
while True:
page += 1
gurl = 'http://www.google.es/reader/api/0/stream/contents/feed/%s' % urllib.quote_plus(feed)
params = {
'r': 'n',
'n': '20',
'ck': int(time.time()),
'client': 'scroll'
}
if (page > 1) and next:
params['n'] = 40
params['c'] = next
try:
r = requests.get(gurl, params=params)
if (r.status_code == 404):
print ' [Fail] Este feed no está en Google Reader'
break
j = r.json()
except requests.RequestException:
print 'Error (RequestException)'
sys.exit()
except ValueError:
print 'Error (JSON)'
sys.exit()
filename = '%s-%s.json' % (filename_string(feed), str(page).zfill(2))
with open(filename, 'w') as f:
f.write(r.content)
if not 'continuation' in j:
print ' [Ok] %s archivo(s) descargado(s)' % page
break
else:
next = j['continuation']
try:
feeds = open('feeds.txt').read().splitlines()
except IOError:
print 'No se ha podido cargar la lista de feeds en feeds.txt'
sys.exit()
for feed in feeds:
if (not feed) or (feed[0] == '#'):
continue
print '* Descargando %s' % feed
save(feed)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment