Skip to content

Instantly share code, notes, and snippets.

@smurfix
Last active September 7, 2016 13:09
Show Gist options
  • Save smurfix/4617b8ae94a6a32d4225 to your computer and use it in GitHub Desktop.
Save smurfix/4617b8ae94a6a32d4225 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# Suppose you have, in InoReader, a folder "images" with a bunch of Tumblr streams.
# Suppose you want to skip everything in that stream *except* for large JPEGs which
# you have not seen before.
##
# InoReader doesn't have a feature for that.
# This is how to do it.
#
# This needs an enhanced librssreader, from github.com/smurfix/librssreader
##
# You need your InoReader username+password.
# If you logged in to them via Google or whatever, click the wheel (top right), then "profile", then "edit profile".
#
# You need an InoReader application ID.
# https://www.inoreader.com/developers/app-auth
#
# You need a tumblr oauth (or "consumer") key.
# https://www.tumblr.com/oauth/apps
#
# You need a tumblr console key.
# https://api.tumblr.com/console/calls/user/info
# Enter all of these in the placeholders below.
# Then simply run this program. Keep the "reader.db" file!
from __future__ import print_function
from librssreader.inoreader import RssReader, ClientAuthMethod, Item
from pprint import pprint
import sys
import anydbm
from pytumblr import TumblrRestClient
from urlparse import urlparse
class KillMe(Exception):
pass
db = anydbm.open("reader.db", 'c')
INO_TAG = "images"
auth = ClientAuthMethod('INO_LOGIN','INO_PASS', 'INO_APPID','INO_APPSECRET')
reader = RssReader(auth)
tumblr = TumblrRestClient("TUMBLR_CONSOLE_KEY", "TUMBLR_CONSOLE_SECRET", 'TUMBLR_OAUTH_TOKEN','TUMBLR_OAUTH_VERIFIER')
# First collect our InoReader data. This is a bit expensive if you have many feeds.
if not reader.buildSubscriptionList():
raise RuntimeError("No sub list build")
sys.exit(1)
cats = reader.getCategories()
cat = {}
for c in cats:
cat[c.label] = c
c = cat[INO_TAG]
cont = None # continuation key for the article list
try: # get the timestamp where we left off last time
ot = int(db['next_ts'])//1000000-5
except KeyError:
ot = None
try: # sequence counter for marked-as-read entries, so that I can undo if necessary
marked = int(db['marked'])
except KeyError:
marked = 0
# Now for the interesting bit
while True:
# grab the next N articles, oldest-first because that's the way I read
r = reader.getFeedContent(c, excludeRead=True, continuation=cont, loadLimit=20, since=ot, until=None, oldest_first=True)
for x in r['items']:
# Use this ID to determine whether this item has already been processed
xid = ('seen:'+x['id']).encode('utf-8')
if xid in db:
print("SKIP",x['id'],x['timestampUsec'])
continue
# A few basic checks and dereferences
p = x['alternate']
assert len(p) == 1
p = p[0]
#print(p['href'])
scheme, netloc, path, params, query, fragment = urlparse(p['href'])
try:
# Assume anyhing not matching isn't from Tumblr
if scheme != "http" and scheme != "https":
continue
if not path.startswith("/post/"):
continue
post_id = int(path[6:])
tp = tumblr.posts(blogname=netloc, id=post_id)
try:
tp = tp['posts']
except KeyError:
pprint(tp['meta'])
if tp['meta']['status'] == 404:
# deleted. Skip in the reader, we can't see the content anyway.
raise KillMe
continue
# We retrieve one post, therefore there is one post.
assert len(tp) == 1, tp
tp = tp[0]
if tp['type'] != "photo":
# Not an image? DIE.
raise KillMe
unseen = False
for ph in tp['photos']:
# Now check if there's a large …
ph = ph['original_size']
if ph['height']<500 and ph['width']<500:
continue
ph = ph['url']
# … JPEG or PNG image …
if ph.lower().endswith('.gif'):
continue
assert ph.lower().endswith('.jpg') or ph.lower().endswith('.jpeg') or ph.lower().endswith('.png'), ph
# … which we have not seen before.
pk = ('tseen:'+ph).encode('utf_8')
if pk in db:
continue
db[pk] = p['href']
unseen = True # Yay!
break
if not unseen:
raise KillMe
except KillMe:
# Remember what we killed off
db['marked:'+str(marked)] = x['id']
marked += 1
db['marked'] = str(marked)
reader._modifyItemTag(x['id'], 'a', 'user/-/state/com.google/read')
print("MARKED:",x['id'])
finally:
# Done with this entry, mark as processed
db[xid] = db['next_ts'] = x['timestampUsec']
# base next-to-process on the continuation ID we got, not the timestamp
ot = None
try:
cont = r['continuation']
except KeyError:
break # done
// ==UserScript==
// @id inoreainoreader-full-tumblr@smurf.noris.de
// @name Fullsize images for Tumblr RSS
// @version 1.6.3
// @namespace http://netz.smurf.noris.de/rssimage
// @url http://netz.smurf.noris.de/rssimage
// @updateURL http://netz.smurf.noris.de/rssimage/tumblr-rss-fullsize.user.js
// @downloadURL http://netz.smurf.noris.de/rssimage/tumblr-rss-fullsize.user.js
// @description Use large images in the Tumblr dashboard
// @require http://ajax.googleapis.com/ajax/libs/jquery/1.8.2/jquery.min.js
// @include http://www.inoreader.com
// @include http://www.inoreader.com/*
// @include https://www.inoreader.com
// @include https://www.inoreader.com/*
// @match http://www.inoreader.com
// @match http://www.inoreader.com/*
// @match https://www.inoreader.com
// @match https://www.inoreader.com/*
// ==/UserScript==
(function() {
var app_id = "YOUR_TUMBLR_APP_ID";
// Keep Greasemonkey around for GM_* calls.
// Credit: http://blog.maxaller.name/2009/01/accessing-gm_xmlhttprequest-from-event-handlers-bound-by-jquery/
var ajaxQueue = [];
var processAjaxQueue = function(){
if (ajaxQueue.length > 0) {
var aq = ajaxQueue;
ajaxQueue = [];
for (ajax in aq) {
var obj = aq[ajax];
// http://diveintogreasemonkey.org/api/gm_xmlhttprequest.html
GM_xmlhttpRequest(obj);
}
}
}
setInterval(processAjaxQueue, 200);
var _get = function(url,callback){
var obj = {
method:"GET",
url:url,
headers:{
"User-Agent":"rss-get-full-image",
"Accept":"application/json",
},
onload:function(details) {
if(details.readyState == 4 && details.status == 200) {
callback($.parseJSON (details.responseText).response);
} else {
console.log("URL_State",details)
}
},
};
ajaxQueue.push(obj);
}
var dashboard = function() {
$("div.article_expanded[exp_seen!='Y']").each(function(){
// Set a marker attribute so that each post is processed exactly once
$(this).attr("exp_seen","Y");
try {
var t = $(this).find("a.article_title_link");
if(!t) return;
console.log("A Title",t);
var c = $(this).find("div.article_content");
if(!c) return;
var link_regexp = /https?:\/\/(.+)\.tumblr.com\/post\/(\d+)/;
var link_result = link_regexp.exec(t[0].href);
if (!link_result) return;
var url = "https://api.tumblr.com/v2/blog/"+link_result[1]+".tumblr.com/posts?id="+link_result[2]+"&api_key="+app_id;
_get(url,function(data) {
var p = data.posts[0].photos;
var i = 0;
c.find("img").each(function(){
$(this).attr("src",p[i].original_size.url);
i=i+1;
});
});
} catch(e) {
console.log(e);
};
});
}
setInterval(dashboard, 1000);
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment