Skip to content

Instantly share code, notes, and snippets.

@mok0
Created March 12, 2020 19:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mok0/ca143184aee9d8525e0dc800d5d7ef73 to your computer and use it in GitHub Desktop.
Save mok0/ca143184aee9d8525e0dc800d5d7ef73 to your computer and use it in GitHub Desktop.
Scrape the website of Danish Health Services SST to obtain their daily updated statistics about the covid19 epidemic.
#! /usr/bin/env python3
# Scrape the website of Danish Health Services SST to obtain their
# daily updated statistics about the covid19 epidemic. The function
# `scrape` returns a dict with data from Denmark, Faeroe Islands,
# global and EU+EAA+UK. It looks like this (2020-03-12):
#
#{'Danmark': (3038, 674, 0),
# 'Færøerne': (92, 2, 0),
# 'Globalt': (None, 125518, 4617),
# 'EU,EØS og UK': (None, 22105, 943)}
#
# Cells without known data contain None (basically the number of
# tested patients globally and in the EU).
#
# This software is distributed according to the GPL-3 license.
# See https://www.gnu.org/licenses/gpl-3.0-standalone.html
# (C) Morten Kjeldgaard, 2020.
from bs4 import BeautifulSoup
import requests
url = "https://www.sst.dk/da/Viden/Smitsomme-sygdomme/Smitsomme-sygdomme-A-AA/Coronavirus/Spoergsmaal-og-svar"
def scrape():
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
data = []
content = soup.select("div.o-content-block__content--push-bottom")
table_responsive = soup.find(class_='table-responsive')
table_body = table_responsive.find('tbody')
rows = table_body.find_all('tr')
for row in rows:
cols = row.find_all('td')
cols = [e.text.strip() for e in cols]
data.append([e for e in cols if e]) # Get rid of empty values
#.
# We scraped information into `data` convert to ints and
# store in a dict for the user.
D = {}
for j in range(1,5):
region = data[j][0]
try:
tested = int(data[j][1].replace('.',''))
except:
tested = None
positive = int(data[j][2].replace('.',''))
dead = int(data[j][3].replace('.',''))
D[region] = (tested, positive, dead)
#.
return D
#.
if __name__ == "__main__":
D = scrape()
print("Antal smittede i dag (SST):", D['Danmark'][1])
#.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment