Skip to content

Instantly share code, notes, and snippets.

@sushinoya
Last active November 4, 2020 18:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sushinoya/7f81be9f9326cefbe5acae1738de783b to your computer and use it in GitHub Desktop.
Save sushinoya/7f81be9f9326cefbe5acae1738de783b to your computer and use it in GitHub Desktop.
Fetch Tweet by Tweet ID
from bs4 import BeautifulSoup, NavigableString
from urllib.request import urlopen, Request
def fetch_tweet(tweet_id):
url = f"https://twitter.com/anyuser/status/{tweet_id}"
agent = "Mozilla/5.0 (compatible; MSIE 7.01; Windows NT 5.0)"
try:
html_response = urlopen(Request(url, headers={'User-Agent': agent}))
except:
# Page no longer exists
return None
html_source = html_response.read().decode('utf-8')
soup = BeautifulSoup(html_source, features="lxml")
tweet_text_div = soup.find("div", {"class": "tweet-text"})
# Twitter account is probably suspended or tweet deleted
if not tweet_text_div:
return None
tweet_components = []
for child in tweet_text_div.children:
if isinstance(child, NavigableString):
continue
tweet_components.append(child.text)
tweet = " ".join(tweet_components).strip()
return tweet
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment