Skip to content

Instantly share code, notes, and snippets.

@lemon24
Created June 10, 2018 18:02
Show Gist options
  • Save lemon24/49930d39d3d336e0efda8218d1e30386 to your computer and use it in GitHub Desktop.
Save lemon24/49930d39d3d336e0efda8218d1e30386 to your computer and use it in GitHub Desktop.
"""
Scoping for https://github.com/lemon24/reader/issues/67 "Tumblr feeds not working".
Output looks like:
$ python3 tumblr.py
initial None
have tumblr redirect
{}
parse_urllib_for_cookies None
{}
parse_urllib_only None
{'.tumblr.com': {'/': {'pfg': ...}}}
parse_requests_for_cookies None
{'.tumblr.com': {'/': {'pfg': ...}}}
parse_requests_only ORIANART
"""
import urllib.request
import re
import json
import feedparser
import requests
url = 'http://fox-orian.tumblr.com/rss'
CONSENT_FORM_SVC = 'https://www.tumblr.com/svc/privacy/consent'
def make_headers(consent_form_url, tumblr_form_key):
return {
# anything starting with 'https://www.tumblr.com/' should work
'Referer': consent_form_url,
'X-tumblr-form-key': tumblr_form_key,
}
def make_json_data():
return {
'eu_resident': True,
'gdpr_consent_core': True,
'gdpr_consent_first_party_ads': True,
'gdpr_consent_search_history': True,
'gdpr_consent_third_party_ads': True,
'gdpr_is_acceptable_age': True,
}
def extract_tumblr_form_key(text):
match = re.search('<meta name="tumblr-form-key" id="tumblr_form_key" content="([^"]+)">', text)
assert match
return match.group(1)
def fill_cookie_jar_requests(session, consent_form_url):
response = session.get(consent_form_url)
assert response.status_code == 200
tumblr_form_key = extract_tumblr_form_key(response.text)
headers = make_headers(consent_form_url, tumblr_form_key)
json_data = make_json_data()
response = session.post(CONSENT_FORM_SVC, json=json_data, headers=headers)
assert response.status_code == 200
def fill_cookie_jar_urllib(opener, consent_form_url):
with opener.open(consent_form_url) as response:
text = response.read().decode('utf-8')
assert response.status == 200
tumblr_form_key = extract_tumblr_form_key(text)
headers = make_headers(consent_form_url, tumblr_form_key)
json_data = make_json_data()
opener.addheaders.extend(headers.items())
data = json.dumps(json_data).encode('utf-8')
with opener.open(CONSENT_FORM_SVC, data) as response:
response.read()
assert response.status == 200
def parse_urllib_for_cookies(url, consent_form_url):
cookie_processor = urllib.request.HTTPCookieProcessor()
opener = urllib.request.build_opener(cookie_processor)
fill_cookie_jar_urllib(opener, consent_form_url)
print(cookie_processor.cookiejar._cookies)
return feedparser.parse(url, handlers=[cookie_processor])
def parse_urllib_only(url, consent_form_url):
cookie_processor = urllib.request.HTTPCookieProcessor()
opener = urllib.request.build_opener(cookie_processor)
fill_cookie_jar_urllib(opener, consent_form_url)
print(cookie_processor.cookiejar._cookies)
with opener.open(url) as response:
text = response.read()
assert response.status == 200
return feedparser.parse(text.decode('utf-8'))
def parse_requests_for_cookies(url, consent_form_url):
cookie_processor = urllib.request.HTTPCookieProcessor()
session = requests.Session()
session.cookies = cookie_processor.cookiejar
fill_cookie_jar_requests(session, consent_form_url)
print(cookie_processor.cookiejar._cookies)
return feedparser.parse(url, handlers=[cookie_processor])
def parse_requests_only(url, consent_form_url):
session = requests.Session()
fill_cookie_jar_requests(session, consent_form_url)
print(session.cookies._cookies)
response = session.get(url)
assert response.status_code == 200
return feedparser.parse(response.content)
f = feedparser.parse(url)
print('initial', f.feed.get('title'))
if f.bozo_exception:
if f.get('status') == 302 and f.get('href', '').startswith('https://www.tumblr.com/privacy/consent'):
print('have tumblr redirect')
print()
consent_form_url = f.href
things = [
parse_urllib_for_cookies,
parse_urllib_only,
parse_requests_for_cookies,
parse_requests_only,
]
for thing in things:
f = thing(url, consent_form_url)
print(thing.__name__, f.feed.get('title'))
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment