Instantly share code, notes, and snippets.

Embed
What would you like to do?
Really simple Web Scraping Python Script for the first Tweets of Donald Trump using Requests, and lxml
#!/usr/bin/python3
# coding: utf-8
import requests
from lxml import html
def extract():
"""
Export all Tweets from @realDonaldTrump
"""
# initialisation
r = requests.session()
# collecte du code source
response = r.get(url='https://twitter.com/realDonaldTrump')
# parsing de la page
page = html.fromstring(response.text)
tweets = page.xpath("//li[contains(@class, 'js-stream-item stream-item stream-item')]")
for tweet in tweets:
text = tweet.xpath(".//p[contains(@class, 'TweetTextSize TweetTextSize--normal js-tweet-text tweet-text')]/text()")
date = tweet.xpath(".//small[@class='time']/a/@title")
if text:
print('En date du: {}'.format(date[0]))
print('Texte: {}'.format(text[0]))
print('\n')
# on lance la fonction
extract()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment