Skip to content

Instantly share code, notes, and snippets.

@Sutto
Forked from Brittt94/Graded Exercise 3
Created April 26, 2020 11:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Sutto/94fae9f9c2919054853bd0b031b7a1a6 to your computer and use it in GitHub Desktop.
Save Sutto/94fae9f9c2919054853bd0b031b7a1a6 to your computer and use it in GitHub Desktop.
from urllib.request import Request, urlopen
import ssl
from bs4 import BeautifulSoup
import pandas as pd
url = 'https://www.who.int/emergencies/diseases/novel-coronavirus-2019/media-resources/news'
#################################################
#################################################
###
headers={'User-Agent': 'Mozilla/5.0 (Macinstosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36(KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
req = Request(url, headers=headers)
context = ssl._create_unverified_context()
uClient= urlopen(req, context=context)
html = uClient.read() # html is stored in variable html
uClient.close()
#################################################
#################################################
soup = BeautifulSoup(html, 'html.parser')
alltext = soup.getText()
maindiv = soup.find('div',class_='sf_colsIn col-md-10')
dataset = [ ]
for blogpost in soup.find_all('div',class_='list-view--item highlight-widget--content'):
title = blogpost.find('p').getText()
url = blogpost.find('a')['href']
dataset.append({
'Title':title,
'Url':url
})
# At the end, when all information is gathered...
dataset = pd.DataFrame(dataset) # Converting list of dictionaries into dataframe
dataset.to_csv('who-news.csv',sep=';',index=False) # Writing dataframe into CSV file
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment