Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Really simple Web Scraping Python Script for the first Tweets of Donald Trump using Requests, and lxml
#!/usr/bin/python3
# coding: utf-8
import requests
from lxml import html
def extract():
"""
Export all Tweets from @realDonaldTrump
"""
# initialisation
r = requests.session()
# collecte du code source
response = r.get(url='https://twitter.com/realDonaldTrump')
# parsing de la page
page = html.fromstring(response.text)
tweets = page.xpath("//li[contains(@class, 'js-stream-item stream-item stream-item')]")
for tweet in tweets:
text = tweet.xpath(".//p[contains(@class, 'TweetTextSize TweetTextSize--normal js-tweet-text tweet-text')]/text()")
date = tweet.xpath(".//small[@class='time']/a/@title")
if text:
print('En date du: {}'.format(date[0]))
print('Texte: {}'.format(text[0]))
print('\n')
# on lance la fonction
extract()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment