Created
March 12, 2020 19:58
-
-
Save mok0/ca143184aee9d8525e0dc800d5d7ef73 to your computer and use it in GitHub Desktop.
Scrape the website of Danish Health Services SST to obtain their daily updated statistics about the covid19 epidemic.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
# Scrape the website of Danish Health Services SST to obtain their | |
# daily updated statistics about the covid19 epidemic. The function | |
# `scrape` returns a dict with data from Denmark, Faeroe Islands, | |
# global and EU+EAA+UK. It looks like this (2020-03-12): | |
# | |
#{'Danmark': (3038, 674, 0), | |
# 'Færøerne': (92, 2, 0), | |
# 'Globalt': (None, 125518, 4617), | |
# 'EU,EØS og UK': (None, 22105, 943)} | |
# | |
# Cells without known data contain None (basically the number of | |
# tested patients globally and in the EU). | |
# | |
# This software is distributed according to the GPL-3 license. | |
# See https://www.gnu.org/licenses/gpl-3.0-standalone.html | |
# (C) Morten Kjeldgaard, 2020. | |
from bs4 import BeautifulSoup | |
import requests | |
url = "https://www.sst.dk/da/Viden/Smitsomme-sygdomme/Smitsomme-sygdomme-A-AA/Coronavirus/Spoergsmaal-og-svar" | |
def scrape(): | |
page = requests.get(url) | |
soup = BeautifulSoup(page.content, 'html.parser') | |
data = [] | |
content = soup.select("div.o-content-block__content--push-bottom") | |
table_responsive = soup.find(class_='table-responsive') | |
table_body = table_responsive.find('tbody') | |
rows = table_body.find_all('tr') | |
for row in rows: | |
cols = row.find_all('td') | |
cols = [e.text.strip() for e in cols] | |
data.append([e for e in cols if e]) # Get rid of empty values | |
#. | |
# We scraped information into `data` convert to ints and | |
# store in a dict for the user. | |
D = {} | |
for j in range(1,5): | |
region = data[j][0] | |
try: | |
tested = int(data[j][1].replace('.','')) | |
except: | |
tested = None | |
positive = int(data[j][2].replace('.','')) | |
dead = int(data[j][3].replace('.','')) | |
D[region] = (tested, positive, dead) | |
#. | |
return D | |
#. | |
if __name__ == "__main__": | |
D = scrape() | |
print("Antal smittede i dag (SST):", D['Danmark'][1]) | |
#. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment