dbrgn/twitterchain.py

## twitterchain.py
# -*- coding: utf-8 -*-
"""
Tracing the twitter chain, down the rabbit hole.

Dependencies:

  - requests
  - beautifulsoup4

"""
from __future__ import print_function, division, absolute_import, unicode_literals

import re
from datetime import datetime

import requests
from bs4 import BeautifulSoup


START_URL = 'https://twitter.com/aendu/status/433586683615784960'


def inception(url):
    # Request tweet page
    r = requests.get(url)
    if r.status_code == 404:
        print('TWEET DELETED, CHAIN BROKEN :(')
        return
    soup = BeautifulSoup(r.text)
    tweet = soup.select('div.tweet.permalink-tweet')[0]

    # Parse out & print tweet info
    text = tweet.find('p', class_='tweet-text').text
    user = tweet.get('data-screen-name')
    timestamp = tweet.find('span', class_='js-relative-timestamp').get('data-time')
    dt = datetime.fromtimestamp(int(timestamp))
    print('{0} @{1}: {2}'.format(dt.isoformat().replace('T', ' '), user, text))

    # And we need to go deeper!
    links = tweet.find('p', class_='tweet-text').find_all('a')
    for link in links:
        url = link.get('data-expanded-url')
        if not url:
            continue
        if re.match(r'^https?:\/{2}(www.)?twitter.*status.*$', url):
            return url


if __name__ == '__main__':
    url = START_URL
    while url:
        url = inception(url)
	# -- coding: utf-8 --
	"""
	Tracing the twitter chain, down the rabbit hole.

	Dependencies:

	- requests
	- beautifulsoup4

	"""
	from __future__ import print_function, division, absolute_import, unicode_literals

	import re
	from datetime import datetime

	import requests
	from bs4 import BeautifulSoup


	START_URL = 'https://twitter.com/aendu/status/433586683615784960'


	def inception(url):
	# Request tweet page
	r = requests.get(url)
	if r.status_code == 404:
	print('TWEET DELETED, CHAIN BROKEN :(')
	return
	soup = BeautifulSoup(r.text)
	tweet = soup.select('div.tweet.permalink-tweet')[0]

	# Parse out & print tweet info
	text = tweet.find('p', class_='tweet-text').text
	user = tweet.get('data-screen-name')
	timestamp = tweet.find('span', class_='js-relative-timestamp').get('data-time')
	dt = datetime.fromtimestamp(int(timestamp))
	print('{0} @{1}: {2}'.format(dt.isoformat().replace('T', ' '), user, text))

	# And we need to go deeper!
	links = tweet.find('p', class_='tweet-text').find_all('a')
	for link in links:
	url = link.get('data-expanded-url')
	if not url:
	continue
	if re.match(r'^https?:\/{2}(www.)?twitter.status.$', url):
	return url


	if __name__ == '__main__':
	url = START_URL
	while url:
	url = inception(url)