Skip to content

Instantly share code, notes, and snippets.

@dbrgn
Created February 12, 2014 14:35
Show Gist options
  • Save dbrgn/8956630 to your computer and use it in GitHub Desktop.
Save dbrgn/8956630 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
"""
Tracing the twitter chain, down the rabbit hole.
Dependencies:
- requests
- beautifulsoup4
"""
from __future__ import print_function, division, absolute_import, unicode_literals
import re
from datetime import datetime
import requests
from bs4 import BeautifulSoup
START_URL = 'https://twitter.com/aendu/status/433586683615784960'
def inception(url):
# Request tweet page
r = requests.get(url)
if r.status_code == 404:
print('TWEET DELETED, CHAIN BROKEN :(')
return
soup = BeautifulSoup(r.text)
tweet = soup.select('div.tweet.permalink-tweet')[0]
# Parse out & print tweet info
text = tweet.find('p', class_='tweet-text').text
user = tweet.get('data-screen-name')
timestamp = tweet.find('span', class_='js-relative-timestamp').get('data-time')
dt = datetime.fromtimestamp(int(timestamp))
print('{0} @{1}: {2}'.format(dt.isoformat().replace('T', ' '), user, text))
# And we need to go deeper!
links = tweet.find('p', class_='tweet-text').find_all('a')
for link in links:
url = link.get('data-expanded-url')
if not url:
continue
if re.match(r'^https?:\/{2}(www.)?twitter.*status.*$', url):
return url
if __name__ == '__main__':
url = START_URL
while url:
url = inception(url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment