Last active
March 17, 2020 10:26
-
-
Save jlopezper/ac628b22b9bad13b85a26333cd64d17a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Ugly code for tacking Covid-19 in Spain | |
# load libraries | |
library(readr) | |
library(ggplot2) | |
library(tidyr) | |
library(dplyr) | |
library(grid) | |
library(gridExtra) | |
library(tabulizer) | |
# read data | |
url_confirmed <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv" | |
corona <- readr::read_csv(url_confirmed) | |
url_deaths <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv" | |
corona_d <- readr::read_csv(url_deaths) | |
#corona <- filter(corona, `Province/State` == 'United Kingdom') | |
#corona_d <- filter(corona_d, `Province/State` == 'United Kingdom') | |
# drop unused columns | |
corona[c('Province/State', 'Lat', 'Long')] <- NULL | |
corona_d[c('Province/State', 'Lat', 'Long')] <- NULL | |
# from wide to long dataframe | |
corona <- | |
pivot_longer(corona, cols = -c(`Country/Region`), names_to = 'date', values_to = 'cases') %>% | |
rename(country = `Country/Region`) %>% | |
filter(country %in% c('Spain')) %>% | |
mutate(date = as.Date(date, format = '%m/%d/%y')) | |
corona_d <- | |
pivot_longer(corona_d, cols = -c(`Country/Region`), names_to = 'date', values_to = 'deaths') %>% | |
rename(country = `Country/Region`) %>% | |
filter(country %in% c('Spain')) %>% | |
mutate(date = as.Date(date, format = '%m/%d/%y')) | |
# set March 12 cases manually (non-updated data) | |
corona[corona$date == "2020-03-12",]$cases <- 3033 | |
corona_d[corona_d$date == "2020-03-12",]$deaths <- 84 | |
# join cases and deaths | |
corona <- | |
left_join(corona, corona_d, by = c('country','date')) %>% | |
mutate(fatality_rate = deaths/cases) | |
# create new cases variable | |
corona <- | |
corona %>% | |
group_by(country) %>% | |
mutate(daily_cases = cases - lag(cases)) %>% | |
ungroup() | |
# plots | |
p1 <- | |
corona %>% | |
filter(date >= '2020-02-25') %>% | |
ggplot(aes(x = date, y = daily_cases)) + | |
geom_point() + | |
geom_line() + | |
labs(title = "New Daily Cases", | |
x = 'Date', | |
y = 'Cases') + | |
theme_minimal() + | |
theme(plot.title = element_text(size=15)) | |
p2 <- | |
corona %>% | |
filter(date >= '2020-02-25') %>% | |
ggplot(aes(x = date, y = cases, label = cases)) + | |
scale_y_log10() + | |
geom_text(nudge_y = .15) + | |
geom_point() + | |
geom_line() + | |
labs(title = "Coronavirus Cases", | |
x = 'Date', | |
y = 'Cases (log scale)') + | |
theme_minimal() + | |
theme(plot.title = element_text(size=15)) | |
p3 <- | |
corona %>% | |
filter(date >= '2020-02-25') %>% | |
ggplot(aes(x = date, y = fatality_rate)) + | |
scale_y_continuous(labels = scales::percent) + | |
geom_point() + | |
geom_line() + | |
labs(title = "Fatality rate", | |
x = 'Date', | |
y = 'Rate') + | |
theme_minimal() + | |
theme(plot.title = element_text(size=15)) | |
p5 <- grid.arrange(p2, | |
arrangeGrob(p1, p3, ncol = 2), | |
nrow=2, | |
top = textGrob("Coronavirus in Spain", gp= gpar(fontsize=20, font = 2))) | |
ggsave(plot = p5, filename = "~/Descargas/covid_spain.png", width = 14, height = 10) | |
# Getting data for each CCAA | |
# Thanks to | |
# https://analisisydecision.es/seguimiento-del-coronavirus-en-espana-por-comunidad-autonoma-extraer-informacion-de-un-pdf-con-r/ | |
pdf_file <- download.file(url = 'https://www.mscbs.gob.es/profesionales/saludPublica/ccayes/alertasActual/nCov-China/documentos/Actualizacion_43_COVID-19.pdf', | |
destfile = tempfile(), method = 'curl', extra = "-k") | |
area <- locate_areas(pdf_file, pages = 2) | |
pdf_lista <- extract_tables( | |
temp, | |
output = "data.frame", | |
pages = c(2), | |
area = list( | |
c(355.7635, 103.6804, 685.7704, 507.7376) | |
), | |
guess = FALSE, | |
encoding = "UTF-8" | |
) | |
datos_ccaa <- | |
data.frame(pdf_lista[1])[1:19, c('CCAA', 'Total.casos', 'Fallecidos')] | |
unlink(temp) | |
## Canarias | |
temp <- tempfile() | |
download.file(url = 'https://www3.gobiernodecanarias.org/sanidad/scs/content/dcb400c5-6504-11ea-9a8e-719d4b52bf6c/InformeCasosCOVID-19.pdf', | |
destfile = temp, method = 'curl', extra = "-k") | |
area <- locate_areas(temp, pages = 1) | |
pdf_lista <- extract_tables( | |
temp, | |
output = "data.frame", | |
pages = c(1), | |
area = list( | |
c(393.4206, 124.5549, 536.8005, 394.7037 ) | |
), | |
guess = FALSE, | |
encoding = "UTF-8" | |
) | |
df <- data.frame(pdf_lista[[1]]) | |
df <- | |
df %>% | |
rename(Edad = Grupo.de.Edad) %>% | |
select(Edad, Mujer, Hombre) | |
df$Edad <- factor(df$Edad, levels = c("10-19 años", "20-29 años", "30-39 años", "40-49 años", "50-59 años", "60-69 años","70-79 años" ,"80-89 años", ">=90 años" ), ordered = TRUE) | |
p <- | |
df %>% | |
pivot_longer(cols = c('Mujer', 'Hombre'), names_to = 'Genero', values_to = 'Casos') %>% | |
ggplot(aes(x = Edad, y = Casos, fill = Genero)) + | |
geom_bar(stat="identity", position=position_dodge())+ | |
theme_minimal() + | |
scale_fill_manual(values=c('#999999','#E69F00')) + | |
geom_text(aes(label = Casos), vjust=-1, color="black", | |
position = position_dodge(0.9), size=3.5) + | |
labs(title = 'COVID-19 en Canarias', | |
subtitle = 'Número de casos detectados por grupo de edad y género', | |
fill = 'Género', | |
caption = "Fuente: Consejería de Sanidad, Gobierno de Canarias\nActualización: 16/03/2020") + | |
theme(plot.title = element_text(size=20), | |
plot.subtitle = element_text(size = 17), | |
axis.text = element_text(size=14), | |
axis.title = element_text(size=14), | |
legend.title = element_text(size=14), | |
legend.text = element_text(size=13)) | |
ggsave(plot = p, filename = "~/Descargas/covid_canarias.png", width = 14, height = 10) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment