Created
April 29, 2013 14:34
-
-
Save onlyhavecans/5481947 to your computer and use it in GitHub Desktop.
Version of pocket 1.3 that will download EVERYTHING
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Pocket Calibre Recipe v1.3 | |
""" | |
from calibre import strftime | |
from calibre.web.feeds.news import BasicNewsRecipe | |
import urllib2 | |
import urllib | |
import json | |
import operator | |
import tempfile | |
__license__ = 'GPL v3' | |
__copyright__ = ''' | |
2010, Darko Miletic <darko.miletic at gmail.com> | |
2011, Przemyslaw Kryger <pkryger at gmail.com> | |
2012, tBunnyMan <Wag That Tail At Me dot com> | |
''' | |
class Pocket(BasicNewsRecipe): | |
title = 'Pocket' | |
__author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan' | |
description = '''Personalized news feeds. Go to getpocket.com to setup up \ | |
your news. This version displays pages of articles from \ | |
oldest to newest, with max & minimum counts, and marks articles \ | |
read after downloading.''' | |
publisher = 'getpocket.com' | |
category = 'news, custom' | |
minimum_articles = 10 | |
mark_as_read_after_dl = False | |
sort_method = 'oldest' | |
only_pull_tag = None | |
no_stylesheets = True | |
use_embedded_content = False | |
needs_subscription = True | |
articles_are_obfuscated = True | |
apikey = '19eg0e47pbT32z4793Tf021k99Afl889' | |
index_url = u'http://getpocket.com' | |
ajax_url = u'http://getpocket.com/a/x/getArticle.php' | |
read_api_url = index_url + u'/v3/get' | |
modify_api_url = index_url + u'/v3/send' | |
legacy_login_url = index_url + u'/l' | |
articles = [] | |
def get_browser(self, *args, **kwargs): | |
""" | |
We need to pretend to be a recent version of safari for the mac to prevent User-Agent checks | |
Pocket api requires username and password so fail loudly if it's missing from the config. | |
""" | |
br = BasicNewsRecipe.get_browser(self, user_agent='Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-us) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4') | |
if self.username is not None and self.password is not None: | |
br.open(self.legacy_login_url) | |
br.select_form(nr=0) | |
br['feed_id'] = self.username | |
br['password'] = self.password | |
br.submit() | |
else: | |
self.user_error("This Recipe requires authentication, please configured user & pass") | |
return br | |
def get_auth_uri(self): | |
"""Quick function to return the authentication part of the url""" | |
uri = "" | |
uri = u'{0}&apikey={1!s}'.format(uri, self.apikey) | |
if self.username is None or self.password is None: | |
self.user_error("Username or password is blank. Pocket no longer supports blank passwords") | |
else: | |
uri = u'{0}&username={1!s}'.format(uri, self.username) | |
uri = u'{0}&password={1!s}'.format(uri, self.password) | |
return uri | |
def get_pull_articles_uri(self): | |
"""Return the part of the uri that has all of the get request settings""" | |
uri = "" | |
uri = u'{0}&state={1}'.format(uri, u'all') | |
uri = u'{0}&contentType={1}'.format(uri, u'article') | |
uri = u'{0}&sort={1}'.format(uri, self.sort_method) | |
if self.only_pull_tag is not None: | |
uri = u'{0}tag={1}'.format(uri, self.only_pull_tag) | |
return uri | |
def parse_index(self): | |
pocket_feed = [] | |
fetch_url = u"{0}?{1}{2}".format( | |
self.read_api_url, | |
self.get_auth_uri(), | |
self.get_pull_articles_uri() | |
) | |
try: | |
request = urllib2.Request(fetch_url) | |
response = urllib2.urlopen(request) | |
pocket_feed = json.load(response)['list'] | |
except urllib2.HTTPError as e: | |
self.log.exception("Pocket returned an error: {0}\nurl: {1}".format(e, fetch_url)) | |
return [] | |
except urllib2.URLError as e: | |
self.log.exception("Unable to connect to getpocket.com's api: {0}\nurl: {1}".format(e, fetch_url)) | |
return [] | |
if len(pocket_feed) < self.minimum_articles: | |
self.mark_as_read_after_dl = False | |
self.user_error("Only {0} articles retrieved, minimum_articles not reached".format(len(pocket_feed))) | |
for pocket_article in pocket_feed.iteritems(): | |
self.articles.append({ | |
'item_id': pocket_article[0], | |
'title': pocket_article[1]['resolved_title'], | |
'date': pocket_article[1]['time_updated'], | |
'url': u'{0}/a/read/{1}'.format(self.index_url, pocket_article[0]), | |
'real_url': pocket_article[1]['resolved_url'], | |
'description': pocket_article[1]['excerpt'], | |
'sort': pocket_article[1]['sort_id'] | |
}) | |
self.articles = sorted(self.articles, key=operator.itemgetter('sort')) | |
print self.articles | |
return [("My Pocket Articles for {0}".format(strftime('[%I:%M %p]')), self.articles)] | |
def get_obfuscated_article(self, url): | |
soup = self.index_to_soup(url) | |
formcheck_script_tag = soup.find('script', text=re.compile("formCheck")) | |
form_check = formcheck_script_tag.split("=")[1].replace("'", "").replace(";", "").strip() | |
article_id = url.split("/")[-1] | |
data = urllib.urlencode({'itemId': article_id, 'formCheck': form_check}) | |
response = self.browser.open(self.ajax_url, data) | |
article_json = json.load(response)['article']['article'] | |
with tempfile.NamedTemporaryFile(delete=False) as tf: | |
tf.write(article_json) | |
return tf.name | |
def mark_as_read(self, mark_list): | |
formatted_list = [] | |
for article_id in mark_list: | |
formatted_list.append({ | |
'action': 'archive', | |
'item_id': article_id | |
}) | |
command = { | |
'actions': formatted_list | |
} | |
mark_read_url = u'{0}?{1}'.format( | |
self.modify_api_url, | |
self.get_auth_uri() | |
) | |
try: | |
request = urllib2.Request(mark_read_url, json.dumps(command)) | |
response = urllib2.urlopen(request) | |
print u'response = {0}'.format(response.info()) | |
except urllib2.HTTPError as e: | |
self.log.exception('Pocket returned an error while archiving articles: {0}'.format(e)) | |
return [] | |
except urllib2.URLError as e: | |
self.log.exception("Unable to connect to getpocket.com's modify api: {0}".format(e)) | |
return [] | |
def cleanup(self): | |
if self.mark_as_read_after_dl: | |
self.mark_as_read([x[1]['item_id'] for x in self.articles]) | |
else: | |
pass | |
def default_cover(self, cover_file): | |
""" | |
Create a generic cover for recipes that don't have a cover | |
This override adds time to the cover | |
""" | |
try: | |
from calibre.ebooks import calibre_cover | |
title = self.title if isinstance(self.title, unicode) else \ | |
self.title.decode('utf-8', 'replace') | |
date = strftime(self.timefmt) | |
time = strftime('[%I:%M %p]') | |
img_data = calibre_cover(title, date, time) | |
cover_file.write(img_data) | |
cover_file.flush() | |
except: | |
self.log.exception('Failed to generate default cover') | |
return False | |
return True | |
def user_error(self, error_message): | |
if hasattr(self, 'abort_recipe_processing'): | |
self.abort_recipe_processing(error_message) | |
else: | |
self.log.exception(error_message) | |
raise RuntimeError(error_message) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment