smurfix/ino_mark_read.py

## ino_mark_read.py
#!/usr/bin/python

# Suppose you have, in InoReader, a folder "images" with a bunch of Tumblr streams.
# Suppose you want to skip everything in that stream *except* for large JPEGs which
# you have not seen before.
##
# InoReader doesn't have a feature for that.
# This is how to do it.
#
# This needs an enhanced librssreader, from github.com/smurfix/librssreader
##
# You need your InoReader username+password.
# If you logged in to them via Google or whatever, click the wheel (top right), then "profile", then "edit profile".
#
# You need an InoReader application ID.
# https://www.inoreader.com/developers/app-auth
#
# You need a tumblr oauth (or "consumer") key.
# https://www.tumblr.com/oauth/apps
#
# You need a tumblr console key.
# https://api.tumblr.com/console/calls/user/info
# Enter all of these in the placeholders below.
# Then simply run this program. Keep the "reader.db" file!

from __future__ import print_function
from librssreader.inoreader import RssReader, ClientAuthMethod, Item
from pprint import pprint
import sys
import anydbm
from pytumblr import TumblrRestClient
from urlparse import urlparse

class KillMe(Exception):
	pass

db = anydbm.open("reader.db", 'c')

INO_TAG = "images"
auth = ClientAuthMethod('INO_LOGIN','INO_PASS', 'INO_APPID','INO_APPSECRET')
reader = RssReader(auth)

tumblr = TumblrRestClient("TUMBLR_CONSOLE_KEY", "TUMBLR_CONSOLE_SECRET", 'TUMBLR_OAUTH_TOKEN','TUMBLR_OAUTH_VERIFIER')

# First collect our InoReader data. This is a bit expensive if you have many feeds.
if not reader.buildSubscriptionList():
	raise RuntimeError("No sub list build")
	sys.exit(1)
cats = reader.getCategories()
cat = {}
for c in cats:
	cat[c.label] = c
c = cat[INO_TAG]

cont = None # continuation key for the article list
try: # get the timestamp where we left off last time
	ot = int(db['next_ts'])//1000000-5
except KeyError:
	ot = None

try: # sequence counter for marked-as-read entries, so that I can undo if necessary
	marked = int(db['marked'])
except KeyError:
	marked = 0

# Now for the interesting bit
while True:
	# grab the next N articles, oldest-first because that's the way I read
	r = reader.getFeedContent(c, excludeRead=True, continuation=cont, loadLimit=20, since=ot, until=None, oldest_first=True)

	for x in r['items']:
		# Use this ID to determine whether this item has already been processed
		xid = ('seen:'+x['id']).encode('utf-8')
		if xid in db:
			print("SKIP",x['id'],x['timestampUsec'])
			continue

		# A few basic checks and dereferences
		p = x['alternate']
		assert len(p) == 1
		p = p[0]
		#print(p['href'])
		scheme, netloc, path, params, query, fragment = urlparse(p['href'])
		try:
			# Assume anyhing not matching isn't from Tumblr
			if scheme != "http" and scheme != "https":
				continue
			if not path.startswith("/post/"):
				continue
			post_id = int(path[6:])
			tp = tumblr.posts(blogname=netloc, id=post_id)
			try:
				tp = tp['posts']
			except KeyError:
				pprint(tp['meta'])
				if tp['meta']['status'] == 404:
					# deleted. Skip in the reader, we can't see the content anyway.
					raise KillMe
				continue
			# We retrieve one post, therefore there is one post.
			assert len(tp) == 1, tp
			tp = tp[0]
			if tp['type'] != "photo":
				# Not an image? DIE.
				raise KillMe
			unseen = False
			for ph in tp['photos']:
				# Now check if there's a large …
				ph = ph['original_size']
				if ph['height']<500 and ph['width']<500:
					continue
				ph = ph['url']
				# … JPEG or PNG image …
				if ph.lower().endswith('.gif'):
					continue
				assert ph.lower().endswith('.jpg') or ph.lower().endswith('.jpeg') or ph.lower().endswith('.png'), ph
				# … which we have not seen before.
				pk = ('tseen:'+ph).encode('utf_8')
				if pk in db:
					continue
				db[pk] = p['href']
				unseen = True # Yay!
				break

			if not unseen:
				raise KillMe
		except KillMe:
			# Remember what we killed off
			db['marked:'+str(marked)] = x['id']
			marked += 1
			db['marked'] = str(marked)
			reader._modifyItemTag(x['id'], 'a', 'user/-/state/com.google/read')
			print("MARKED:",x['id'])
		finally:
			# Done with this entry, mark as processed
			db[xid] = db['next_ts'] = x['timestampUsec']
	# base next-to-process on the continuation ID we got, not the timestamp
	ot = None
	try:
		cont = r['continuation']
	except KeyError:
		break # done


## tumblr-rss-fullsize.user.js
// ==UserScript==
// @id             inoreainoreader-full-tumblr@smurf.noris.de
// @name           Fullsize images for Tumblr RSS
// @version        1.6.3
// @namespace      http://netz.smurf.noris.de/rssimage
// @url            http://netz.smurf.noris.de/rssimage
// @updateURL      http://netz.smurf.noris.de/rssimage/tumblr-rss-fullsize.user.js
// @downloadURL    http://netz.smurf.noris.de/rssimage/tumblr-rss-fullsize.user.js
// @description    Use large images in the Tumblr dashboard
// @require        http://ajax.googleapis.com/ajax/libs/jquery/1.8.2/jquery.min.js
// @include        http://www.inoreader.com
// @include        http://www.inoreader.com/*
// @include        https://www.inoreader.com
// @include        https://www.inoreader.com/*
// @match          http://www.inoreader.com
// @match          http://www.inoreader.com/*
// @match          https://www.inoreader.com
// @match          https://www.inoreader.com/*
// ==/UserScript==

(function() {
    var app_id = "YOUR_TUMBLR_APP_ID";

    // Keep Greasemonkey around for GM_* calls.
    // Credit: http://blog.maxaller.name/2009/01/accessing-gm_xmlhttprequest-from-event-handlers-bound-by-jquery/
    var ajaxQueue = [];
    var processAjaxQueue = function(){
      if (ajaxQueue.length > 0) {
        var aq = ajaxQueue;
        ajaxQueue = [];

        for (ajax in aq) {
          var obj = aq[ajax];
          // http://diveintogreasemonkey.org/api/gm_xmlhttprequest.html
          GM_xmlhttpRequest(obj);
        }
      }
    }
    setInterval(processAjaxQueue, 200);
    var _get = function(url,callback){
       var obj = {
          method:"GET",
          url:url,
          headers:{
            "User-Agent":"rss-get-full-image",
            "Accept":"application/json",
            },
          onload:function(details) {
            if(details.readyState == 4 && details.status == 200) {
               callback($.parseJSON (details.responseText).response);
            } else {
               console.log("URL_State",details)
            }
          },
       };
       ajaxQueue.push(obj);
    }

    var dashboard = function() {
        $("div.article_expanded[exp_seen!='Y']").each(function(){
            // Set a marker attribute so that each post is processed exactly once
            $(this).attr("exp_seen","Y");
            try {
                var t = $(this).find("a.article_title_link");
                if(!t) return;
                console.log("A Title",t);
                var c = $(this).find("div.article_content");
                if(!c) return;
                var link_regexp = /https?:\/\/(.+)\.tumblr.com\/post\/(\d+)/;
                var link_result = link_regexp.exec(t[0].href);
                if (!link_result) return;

                var url = "https://api.tumblr.com/v2/blog/"+link_result[1]+".tumblr.com/posts?id="+link_result[2]+"&api_key="+app_id;
                _get(url,function(data) {
                    var p = data.posts[0].photos;
                    var i = 0;
                    c.find("img").each(function(){
                        $(this).attr("src",p[i].original_size.url);
                        i=i+1;
                    });
                });
            } catch(e) {
                console.log(e);
            };
        });
    }
    setInterval(dashboard, 1000);

})();
	#!/usr/bin/python

	# Suppose you have, in InoReader, a folder "images" with a bunch of Tumblr streams.
	# Suppose you want to skip everything in that stream except for large JPEGs which
	# you have not seen before.
	##
	# InoReader doesn't have a feature for that.
	# This is how to do it.
	#
	# This needs an enhanced librssreader, from github.com/smurfix/librssreader
	##
	# You need your InoReader username+password.
	# If you logged in to them via Google or whatever, click the wheel (top right), then "profile", then "edit profile".
	#
	# You need an InoReader application ID.
	# https://www.inoreader.com/developers/app-auth
	#
	# You need a tumblr oauth (or "consumer") key.
	# https://www.tumblr.com/oauth/apps
	#
	# You need a tumblr console key.
	# https://api.tumblr.com/console/calls/user/info
	# Enter all of these in the placeholders below.
	# Then simply run this program. Keep the "reader.db" file!

	from __future__ import print_function
	from librssreader.inoreader import RssReader, ClientAuthMethod, Item
	from pprint import pprint
	import sys
	import anydbm
	from pytumblr import TumblrRestClient
	from urlparse import urlparse

	class KillMe(Exception):
	pass

	db = anydbm.open("reader.db", 'c')

	INO_TAG = "images"
	auth = ClientAuthMethod('INO_LOGIN','INO_PASS', 'INO_APPID','INO_APPSECRET')
	reader = RssReader(auth)

	tumblr = TumblrRestClient("TUMBLR_CONSOLE_KEY", "TUMBLR_CONSOLE_SECRET", 'TUMBLR_OAUTH_TOKEN','TUMBLR_OAUTH_VERIFIER')

	# First collect our InoReader data. This is a bit expensive if you have many feeds.
	if not reader.buildSubscriptionList():
	raise RuntimeError("No sub list build")
	sys.exit(1)
	cats = reader.getCategories()
	cat = {}
	for c in cats:
	cat[c.label] = c
	c = cat[INO_TAG]

	cont = None # continuation key for the article list
	try: # get the timestamp where we left off last time
	ot = int(db['next_ts'])//1000000-5
	except KeyError:
	ot = None

	try: # sequence counter for marked-as-read entries, so that I can undo if necessary
	marked = int(db['marked'])
	except KeyError:
	marked = 0

	# Now for the interesting bit
	while True:
	# grab the next N articles, oldest-first because that's the way I read
	r = reader.getFeedContent(c, excludeRead=True, continuation=cont, loadLimit=20, since=ot, until=None, oldest_first=True)

	for x in r['items']:
	# Use this ID to determine whether this item has already been processed
	xid = ('seen:'+x['id']).encode('utf-8')
	if xid in db:
	print("SKIP",x['id'],x['timestampUsec'])
	continue

	# A few basic checks and dereferences
	p = x['alternate']
	assert len(p) == 1
	p = p[0]
	#print(p['href'])
	scheme, netloc, path, params, query, fragment = urlparse(p['href'])
	try:
	# Assume anyhing not matching isn't from Tumblr
	if scheme != "http" and scheme != "https":
	continue
	if not path.startswith("/post/"):
	continue
	post_id = int(path[6:])
	tp = tumblr.posts(blogname=netloc, id=post_id)
	try:
	tp = tp['posts']
	except KeyError:
	pprint(tp['meta'])
	if tp['meta']['status'] == 404:
	# deleted. Skip in the reader, we can't see the content anyway.
	raise KillMe
	continue
	# We retrieve one post, therefore there is one post.
	assert len(tp) == 1, tp
	tp = tp[0]
	if tp['type'] != "photo":
	# Not an image? DIE.
	raise KillMe
	unseen = False
	for ph in tp['photos']:
	# Now check if there's a large …
	ph = ph['original_size']
	if ph['height']<500 and ph['width']<500:
	continue
	ph = ph['url']
	# … JPEG or PNG image …
	if ph.lower().endswith('.gif'):
	continue
	assert ph.lower().endswith('.jpg') or ph.lower().endswith('.jpeg') or ph.lower().endswith('.png'), ph
	# … which we have not seen before.
	pk = ('tseen:'+ph).encode('utf_8')
	if pk in db:
	continue
	db[pk] = p['href']
	unseen = True # Yay!
	break

	if not unseen:
	raise KillMe
	except KillMe:
	# Remember what we killed off
	db['marked:'+str(marked)] = x['id']
	marked += 1
	db['marked'] = str(marked)
	reader._modifyItemTag(x['id'], 'a', 'user/-/state/com.google/read')
	print("MARKED:",x['id'])
	finally:
	# Done with this entry, mark as processed
	db[xid] = db['next_ts'] = x['timestampUsec']
	# base next-to-process on the continuation ID we got, not the timestamp
	ot = None
	try:
	cont = r['continuation']
	except KeyError:
	break # done
	// ==UserScript==
	// @id inoreainoreader-full-tumblr@smurf.noris.de
	// @name Fullsize images for Tumblr RSS
	// @version 1.6.3
	// @namespace http://netz.smurf.noris.de/rssimage
	// @url http://netz.smurf.noris.de/rssimage
	// @updateURL http://netz.smurf.noris.de/rssimage/tumblr-rss-fullsize.user.js
	// @downloadURL http://netz.smurf.noris.de/rssimage/tumblr-rss-fullsize.user.js
	// @description Use large images in the Tumblr dashboard
	// @require http://ajax.googleapis.com/ajax/libs/jquery/1.8.2/jquery.min.js
	// @include http://www.inoreader.com
	// @include http://www.inoreader.com/*
	// @include https://www.inoreader.com
	// @include https://www.inoreader.com/*
	// @match http://www.inoreader.com
	// @match http://www.inoreader.com/*
	// @match https://www.inoreader.com
	// @match https://www.inoreader.com/*
	// ==/UserScript==

	(function() {
	var app_id = "YOUR_TUMBLR_APP_ID";

	// Keep Greasemonkey around for GM_* calls.
	// Credit: http://blog.maxaller.name/2009/01/accessing-gm_xmlhttprequest-from-event-handlers-bound-by-jquery/
	var ajaxQueue = [];
	var processAjaxQueue = function(){
	if (ajaxQueue.length > 0) {
	var aq = ajaxQueue;
	ajaxQueue = [];

	for (ajax in aq) {
	var obj = aq[ajax];
	// http://diveintogreasemonkey.org/api/gm_xmlhttprequest.html
	GM_xmlhttpRequest(obj);
	}
	}
	}
	setInterval(processAjaxQueue, 200);
	var _get = function(url,callback){
	var obj = {
	method:"GET",
	url:url,
	headers:{
	"User-Agent":"rss-get-full-image",
	"Accept":"application/json",
	},
	onload:function(details) {
	if(details.readyState == 4 && details.status == 200) {
	callback($.parseJSON (details.responseText).response);
	} else {
	console.log("URL_State",details)
	}
	},
	};
	ajaxQueue.push(obj);
	}

	var dashboard = function() {
	$("div.article_expanded[exp_seen!='Y']").each(function(){
	// Set a marker attribute so that each post is processed exactly once
	$(this).attr("exp_seen","Y");
	try {
	var t = $(this).find("a.article_title_link");
	if(!t) return;
	console.log("A Title",t);
	var c = $(this).find("div.article_content");
	if(!c) return;
	var link_regexp = /https?:\/\/(.+)\.tumblr.com\/post\/(\d+)/;
	var link_result = link_regexp.exec(t[0].href);
	if (!link_result) return;

	var url = "https://api.tumblr.com/v2/blog/"+link_result[1]+".tumblr.com/posts?id="+link_result[2]+"&api_key="+app_id;
	_get(url,function(data) {
	var p = data.posts[0].photos;
	var i = 0;
	c.find("img").each(function(){
	$(this).attr("src",p[i].original_size.url);
	i=i+1;
	});
	});
	} catch(e) {
	console.log(e);
	};
	});
	}
	setInterval(dashboard, 1000);

	})();