Last active
September 7, 2016 13:09
-
-
Save smurfix/4617b8ae94a6a32d4225 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# Suppose you have, in InoReader, a folder "images" with a bunch of Tumblr streams. | |
# Suppose you want to skip everything in that stream *except* for large JPEGs which | |
# you have not seen before. | |
## | |
# InoReader doesn't have a feature for that. | |
# This is how to do it. | |
# | |
# This needs an enhanced librssreader, from github.com/smurfix/librssreader | |
## | |
# You need your InoReader username+password. | |
# If you logged in to them via Google or whatever, click the wheel (top right), then "profile", then "edit profile". | |
# | |
# You need an InoReader application ID. | |
# https://www.inoreader.com/developers/app-auth | |
# | |
# You need a tumblr oauth (or "consumer") key. | |
# https://www.tumblr.com/oauth/apps | |
# | |
# You need a tumblr console key. | |
# https://api.tumblr.com/console/calls/user/info | |
# Enter all of these in the placeholders below. | |
# Then simply run this program. Keep the "reader.db" file! | |
from __future__ import print_function | |
from librssreader.inoreader import RssReader, ClientAuthMethod, Item | |
from pprint import pprint | |
import sys | |
import anydbm | |
from pytumblr import TumblrRestClient | |
from urlparse import urlparse | |
class KillMe(Exception): | |
pass | |
db = anydbm.open("reader.db", 'c') | |
INO_TAG = "images" | |
auth = ClientAuthMethod('INO_LOGIN','INO_PASS', 'INO_APPID','INO_APPSECRET') | |
reader = RssReader(auth) | |
tumblr = TumblrRestClient("TUMBLR_CONSOLE_KEY", "TUMBLR_CONSOLE_SECRET", 'TUMBLR_OAUTH_TOKEN','TUMBLR_OAUTH_VERIFIER') | |
# First collect our InoReader data. This is a bit expensive if you have many feeds. | |
if not reader.buildSubscriptionList(): | |
raise RuntimeError("No sub list build") | |
sys.exit(1) | |
cats = reader.getCategories() | |
cat = {} | |
for c in cats: | |
cat[c.label] = c | |
c = cat[INO_TAG] | |
cont = None # continuation key for the article list | |
try: # get the timestamp where we left off last time | |
ot = int(db['next_ts'])//1000000-5 | |
except KeyError: | |
ot = None | |
try: # sequence counter for marked-as-read entries, so that I can undo if necessary | |
marked = int(db['marked']) | |
except KeyError: | |
marked = 0 | |
# Now for the interesting bit | |
while True: | |
# grab the next N articles, oldest-first because that's the way I read | |
r = reader.getFeedContent(c, excludeRead=True, continuation=cont, loadLimit=20, since=ot, until=None, oldest_first=True) | |
for x in r['items']: | |
# Use this ID to determine whether this item has already been processed | |
xid = ('seen:'+x['id']).encode('utf-8') | |
if xid in db: | |
print("SKIP",x['id'],x['timestampUsec']) | |
continue | |
# A few basic checks and dereferences | |
p = x['alternate'] | |
assert len(p) == 1 | |
p = p[0] | |
#print(p['href']) | |
scheme, netloc, path, params, query, fragment = urlparse(p['href']) | |
try: | |
# Assume anyhing not matching isn't from Tumblr | |
if scheme != "http" and scheme != "https": | |
continue | |
if not path.startswith("/post/"): | |
continue | |
post_id = int(path[6:]) | |
tp = tumblr.posts(blogname=netloc, id=post_id) | |
try: | |
tp = tp['posts'] | |
except KeyError: | |
pprint(tp['meta']) | |
if tp['meta']['status'] == 404: | |
# deleted. Skip in the reader, we can't see the content anyway. | |
raise KillMe | |
continue | |
# We retrieve one post, therefore there is one post. | |
assert len(tp) == 1, tp | |
tp = tp[0] | |
if tp['type'] != "photo": | |
# Not an image? DIE. | |
raise KillMe | |
unseen = False | |
for ph in tp['photos']: | |
# Now check if there's a large … | |
ph = ph['original_size'] | |
if ph['height']<500 and ph['width']<500: | |
continue | |
ph = ph['url'] | |
# … JPEG or PNG image … | |
if ph.lower().endswith('.gif'): | |
continue | |
assert ph.lower().endswith('.jpg') or ph.lower().endswith('.jpeg') or ph.lower().endswith('.png'), ph | |
# … which we have not seen before. | |
pk = ('tseen:'+ph).encode('utf_8') | |
if pk in db: | |
continue | |
db[pk] = p['href'] | |
unseen = True # Yay! | |
break | |
if not unseen: | |
raise KillMe | |
except KillMe: | |
# Remember what we killed off | |
db['marked:'+str(marked)] = x['id'] | |
marked += 1 | |
db['marked'] = str(marked) | |
reader._modifyItemTag(x['id'], 'a', 'user/-/state/com.google/read') | |
print("MARKED:",x['id']) | |
finally: | |
# Done with this entry, mark as processed | |
db[xid] = db['next_ts'] = x['timestampUsec'] | |
# base next-to-process on the continuation ID we got, not the timestamp | |
ot = None | |
try: | |
cont = r['continuation'] | |
except KeyError: | |
break # done | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ==UserScript== | |
// @id inoreainoreader-full-tumblr@smurf.noris.de | |
// @name Fullsize images for Tumblr RSS | |
// @version 1.6.3 | |
// @namespace http://netz.smurf.noris.de/rssimage | |
// @url http://netz.smurf.noris.de/rssimage | |
// @updateURL http://netz.smurf.noris.de/rssimage/tumblr-rss-fullsize.user.js | |
// @downloadURL http://netz.smurf.noris.de/rssimage/tumblr-rss-fullsize.user.js | |
// @description Use large images in the Tumblr dashboard | |
// @require http://ajax.googleapis.com/ajax/libs/jquery/1.8.2/jquery.min.js | |
// @include http://www.inoreader.com | |
// @include http://www.inoreader.com/* | |
// @include https://www.inoreader.com | |
// @include https://www.inoreader.com/* | |
// @match http://www.inoreader.com | |
// @match http://www.inoreader.com/* | |
// @match https://www.inoreader.com | |
// @match https://www.inoreader.com/* | |
// ==/UserScript== | |
(function() { | |
var app_id = "YOUR_TUMBLR_APP_ID"; | |
// Keep Greasemonkey around for GM_* calls. | |
// Credit: http://blog.maxaller.name/2009/01/accessing-gm_xmlhttprequest-from-event-handlers-bound-by-jquery/ | |
var ajaxQueue = []; | |
var processAjaxQueue = function(){ | |
if (ajaxQueue.length > 0) { | |
var aq = ajaxQueue; | |
ajaxQueue = []; | |
for (ajax in aq) { | |
var obj = aq[ajax]; | |
// http://diveintogreasemonkey.org/api/gm_xmlhttprequest.html | |
GM_xmlhttpRequest(obj); | |
} | |
} | |
} | |
setInterval(processAjaxQueue, 200); | |
var _get = function(url,callback){ | |
var obj = { | |
method:"GET", | |
url:url, | |
headers:{ | |
"User-Agent":"rss-get-full-image", | |
"Accept":"application/json", | |
}, | |
onload:function(details) { | |
if(details.readyState == 4 && details.status == 200) { | |
callback($.parseJSON (details.responseText).response); | |
} else { | |
console.log("URL_State",details) | |
} | |
}, | |
}; | |
ajaxQueue.push(obj); | |
} | |
var dashboard = function() { | |
$("div.article_expanded[exp_seen!='Y']").each(function(){ | |
// Set a marker attribute so that each post is processed exactly once | |
$(this).attr("exp_seen","Y"); | |
try { | |
var t = $(this).find("a.article_title_link"); | |
if(!t) return; | |
console.log("A Title",t); | |
var c = $(this).find("div.article_content"); | |
if(!c) return; | |
var link_regexp = /https?:\/\/(.+)\.tumblr.com\/post\/(\d+)/; | |
var link_result = link_regexp.exec(t[0].href); | |
if (!link_result) return; | |
var url = "https://api.tumblr.com/v2/blog/"+link_result[1]+".tumblr.com/posts?id="+link_result[2]+"&api_key="+app_id; | |
_get(url,function(data) { | |
var p = data.posts[0].photos; | |
var i = 0; | |
c.find("img").each(function(){ | |
$(this).attr("src",p[i].original_size.url); | |
i=i+1; | |
}); | |
}); | |
} catch(e) { | |
console.log(e); | |
}; | |
}); | |
} | |
setInterval(dashboard, 1000); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment