Skip to content

Instantly share code, notes, and snippets.

@enric1994
Created May 4, 2019 20:12
Show Gist options
  • Save enric1994/c51d50cd94bb22bb2cc5cd7457d55c4b to your computer and use it in GitHub Desktop.
Save enric1994/c51d50cd94bb22bb2cc5cd7457d55c4b to your computer and use it in GitHub Desktop.
Scrapping the promoted Tweets from some political parties in Ireland
from selenium import webdriver
import time
from bs4 import BeautifulSoup
import csv
import sys
reload(sys)
sys.setdefaultencoding('utf8')
#write header
with open('twitter.csv', 'a') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=';',
quotechar='"', quoting=csv.QUOTE_MINIMAL)
spamwriter.writerow([
'username',
'tweet',
'retweets',
'likes',
'date',
'fullname'
])
driver = webdriver.Chrome()
parties = [
"FineGael",
"fiannafailparty",
"sinnfeinireland",
"labour",
"pb4p"
]
for party in parties:
print(party)
driver.get('https://ads.twitter.com/transparency/'+party)
time.sleep(3)
html_source = driver.page_source
file_ = open('page.html', 'w')
file_.write(html_source)
file_.close()
with open("page.html") as fp:
soup = BeautifulSoup(fp)
#get full name
title=soup.findAll("span",class_="src-components-UserNameTitle-styles-module--name")
# import pdb;pdb.set_trace()
fullname = title[0].get_text()
print(fullname)
# for each tweet
for tweet in soup.findAll("div",class_="Tweet--web"):
#get timestamp
header=tweet.findAll("a",class_="Tweet-timestamp")
# import pdb;pdb.set_trace()
timestamp = header[0].get_text()
print(timestamp)
# get message
body=tweet.findAll("div",class_="Tweet-text")
text = body[0].get_text()
print(text)
#get rt and likes
footer=tweet.findAll("span",class_="Tweet-actionCount")
retweets = footer[1].get_text()
likes = footer[2].get_text()
# import pdb;pdb.set_trace()
print(retweets)
print(likes)
print("--------------")
#write to CSV file
spamwriter.writerow([
party,
text,
retweets,
likes,
timestamp,
fullname
])
driver.quit()
csvfile.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment