gradha/tumblr-gdpr-cgi-bin-proxy.py

## tumblr-gdpr-cgi-bin-proxy.py
#!/usr/bin/env python
print "Content-Type: text/xml"
print

# To use this script, put it on your host supporting python 2.x, in the cgi-bin
# directory. Then test with your browser something like
# https://your.host.com/cgi-bin/tumblr-gdpr-cgi-bin-proxy?u=hematocritico.tumblr.com/rss
# Hopefully that works and you get some rss xml.

import cgi
import cgitb; cgitb.enable()
import urllib2
import re
import sys

def fetch_rss(partial_url):
    REGEX = re.compile('tumblr_form_key.*?content="([^"]*)')
    #FEED_URL = "https://hematocritico.tumblr.com/rss"
    FEED_URL = "https://" + partial_url
    CONSENT_URL = "https://www.tumblr.com/svc/privacy/consent"
    REFERER = ("https://www.tumblr.com/privacy/consent?redirect=" +
        urllib2.quote(FEED_URL, safe=""))
    CONTENT_TYPE = "application/json"
    USER_AGENT_KEY = 'User-Agent'
    USER_AGENT_DATA = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11'

    DATA = """{
            "eu_resident": true,
            "gdpr_is_acceptable_age": true,
            "gdpr_consent_core": true,
            "gdpr_consent_first_party_ads": true,
            "gdpr_consent_third_party_ads": true,
            "gdpr_consent_search_history": true,
            "redirect_to": "%s"
    }""" % (FEED_URL)


    hsh = urllib2.HTTPSHandler()
    #hsh.set_http_debuglevel(1)
    cookie_processor = urllib2.HTTPCookieProcessor()
    opener = urllib2.build_opener(hsh, cookie_processor)

    urllib2.install_opener(opener)

    #print("\tFetch tumblr form key")
    request = urllib2.Request(FEED_URL, headers = {
        USER_AGENT_KEY: USER_AGENT_DATA,
        })

    a = urllib2.urlopen(request).read()
    m = REGEX.search(a)
    tumblr_form_key = m.group(1)

    #print("\tRequest cookie consent (pfg)")
    request = urllib2.Request(CONSENT_URL, DATA, {
        "Content-Type": CONTENT_TYPE,
        #"X-Requested-With": "XMLHttpRequest",
        "Origin": "https://www.tumblr.com",
        USER_AGENT_KEY: USER_AGENT_DATA,
        "x-tumblr-form-key": tumblr_form_key,
        "referer": REFERER})

    a = urllib2.urlopen(request).read()

    #print("\tTry to fetch again rss")
    request = urllib2.Request(FEED_URL, headers = {
        USER_AGENT_KEY: USER_AGENT_DATA,
        })

    return urllib2.urlopen(request).read()

arguments = cgi.FieldStorage()
print fetch_rss(arguments.getfirst("u", "nourl"))