Skip to content

Instantly share code, notes, and snippets.

@gradha
Created August 5, 2018 11:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gradha/83385ac91ad279c978d7647e4db5f56f to your computer and use it in GitHub Desktop.
Save gradha/83385ac91ad279c978d7647e4db5f56f to your computer and use it in GitHub Desktop.
Simple python cgi-bin proxy to fetch rss feeds from Europe (see https://github.com/ViennaRSS/vienna-rss/issues/1166)
#!/usr/bin/env python
print "Content-Type: text/xml"
print
# To use this script, put it on your host supporting python 2.x, in the cgi-bin
# directory. Then test with your browser something like
# https://your.host.com/cgi-bin/tumblr-gdpr-cgi-bin-proxy?u=hematocritico.tumblr.com/rss
# Hopefully that works and you get some rss xml.
import cgi
import cgitb; cgitb.enable()
import urllib2
import re
import sys
def fetch_rss(partial_url):
REGEX = re.compile('tumblr_form_key.*?content="([^"]*)')
#FEED_URL = "https://hematocritico.tumblr.com/rss"
FEED_URL = "https://" + partial_url
CONSENT_URL = "https://www.tumblr.com/svc/privacy/consent"
REFERER = ("https://www.tumblr.com/privacy/consent?redirect=" +
urllib2.quote(FEED_URL, safe=""))
CONTENT_TYPE = "application/json"
USER_AGENT_KEY = 'User-Agent'
USER_AGENT_DATA = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11'
DATA = """{
"eu_resident": true,
"gdpr_is_acceptable_age": true,
"gdpr_consent_core": true,
"gdpr_consent_first_party_ads": true,
"gdpr_consent_third_party_ads": true,
"gdpr_consent_search_history": true,
"redirect_to": "%s"
}""" % (FEED_URL)
hsh = urllib2.HTTPSHandler()
#hsh.set_http_debuglevel(1)
cookie_processor = urllib2.HTTPCookieProcessor()
opener = urllib2.build_opener(hsh, cookie_processor)
urllib2.install_opener(opener)
#print("\tFetch tumblr form key")
request = urllib2.Request(FEED_URL, headers = {
USER_AGENT_KEY: USER_AGENT_DATA,
})
a = urllib2.urlopen(request).read()
m = REGEX.search(a)
tumblr_form_key = m.group(1)
#print("\tRequest cookie consent (pfg)")
request = urllib2.Request(CONSENT_URL, DATA, {
"Content-Type": CONTENT_TYPE,
#"X-Requested-With": "XMLHttpRequest",
"Origin": "https://www.tumblr.com",
USER_AGENT_KEY: USER_AGENT_DATA,
"x-tumblr-form-key": tumblr_form_key,
"referer": REFERER})
a = urllib2.urlopen(request).read()
#print("\tTry to fetch again rss")
request = urllib2.Request(FEED_URL, headers = {
USER_AGENT_KEY: USER_AGENT_DATA,
})
return urllib2.urlopen(request).read()
arguments = cgi.FieldStorage()
print fetch_rss(arguments.getfirst("u", "nourl"))
@gyab
Copy link

gyab commented Jul 17, 2019

Thank you for this.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment