Skip to content

Instantly share code, notes, and snippets.

@jlopezper
Last active March 17, 2020 10:26
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jlopezper/ac628b22b9bad13b85a26333cd64d17a to your computer and use it in GitHub Desktop.
Save jlopezper/ac628b22b9bad13b85a26333cd64d17a to your computer and use it in GitHub Desktop.
# Ugly code for tacking Covid-19 in Spain
# load libraries
library(readr)
library(ggplot2)
library(tidyr)
library(dplyr)
library(grid)
library(gridExtra)
library(tabulizer)
# read data
url_confirmed <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv"
corona <- readr::read_csv(url_confirmed)
url_deaths <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv"
corona_d <- readr::read_csv(url_deaths)
#corona <- filter(corona, `Province/State` == 'United Kingdom')
#corona_d <- filter(corona_d, `Province/State` == 'United Kingdom')
# drop unused columns
corona[c('Province/State', 'Lat', 'Long')] <- NULL
corona_d[c('Province/State', 'Lat', 'Long')] <- NULL
# from wide to long dataframe
corona <-
pivot_longer(corona, cols = -c(`Country/Region`), names_to = 'date', values_to = 'cases') %>%
rename(country = `Country/Region`) %>%
filter(country %in% c('Spain')) %>%
mutate(date = as.Date(date, format = '%m/%d/%y'))
corona_d <-
pivot_longer(corona_d, cols = -c(`Country/Region`), names_to = 'date', values_to = 'deaths') %>%
rename(country = `Country/Region`) %>%
filter(country %in% c('Spain')) %>%
mutate(date = as.Date(date, format = '%m/%d/%y'))
# set March 12 cases manually (non-updated data)
corona[corona$date == "2020-03-12",]$cases <- 3033
corona_d[corona_d$date == "2020-03-12",]$deaths <- 84
# join cases and deaths
corona <-
left_join(corona, corona_d, by = c('country','date')) %>%
mutate(fatality_rate = deaths/cases)
# create new cases variable
corona <-
corona %>%
group_by(country) %>%
mutate(daily_cases = cases - lag(cases)) %>%
ungroup()
# plots
p1 <-
corona %>%
filter(date >= '2020-02-25') %>%
ggplot(aes(x = date, y = daily_cases)) +
geom_point() +
geom_line() +
labs(title = "New Daily Cases",
x = 'Date',
y = 'Cases') +
theme_minimal() +
theme(plot.title = element_text(size=15))
p2 <-
corona %>%
filter(date >= '2020-02-25') %>%
ggplot(aes(x = date, y = cases, label = cases)) +
scale_y_log10() +
geom_text(nudge_y = .15) +
geom_point() +
geom_line() +
labs(title = "Coronavirus Cases",
x = 'Date',
y = 'Cases (log scale)') +
theme_minimal() +
theme(plot.title = element_text(size=15))
p3 <-
corona %>%
filter(date >= '2020-02-25') %>%
ggplot(aes(x = date, y = fatality_rate)) +
scale_y_continuous(labels = scales::percent) +
geom_point() +
geom_line() +
labs(title = "Fatality rate",
x = 'Date',
y = 'Rate') +
theme_minimal() +
theme(plot.title = element_text(size=15))
p5 <- grid.arrange(p2,
arrangeGrob(p1, p3, ncol = 2),
nrow=2,
top = textGrob("Coronavirus in Spain", gp= gpar(fontsize=20, font = 2)))
ggsave(plot = p5, filename = "~/Descargas/covid_spain.png", width = 14, height = 10)
# Getting data for each CCAA
# Thanks to
# https://analisisydecision.es/seguimiento-del-coronavirus-en-espana-por-comunidad-autonoma-extraer-informacion-de-un-pdf-con-r/
pdf_file <- download.file(url = 'https://www.mscbs.gob.es/profesionales/saludPublica/ccayes/alertasActual/nCov-China/documentos/Actualizacion_43_COVID-19.pdf',
destfile = tempfile(), method = 'curl', extra = "-k")
area <- locate_areas(pdf_file, pages = 2)
pdf_lista <- extract_tables(
temp,
output = "data.frame",
pages = c(2),
area = list(
c(355.7635, 103.6804, 685.7704, 507.7376)
),
guess = FALSE,
encoding = "UTF-8"
)
datos_ccaa <-
data.frame(pdf_lista[1])[1:19, c('CCAA', 'Total.casos', 'Fallecidos')]
unlink(temp)
## Canarias
temp <- tempfile()
download.file(url = 'https://www3.gobiernodecanarias.org/sanidad/scs/content/dcb400c5-6504-11ea-9a8e-719d4b52bf6c/InformeCasosCOVID-19.pdf',
destfile = temp, method = 'curl', extra = "-k")
area <- locate_areas(temp, pages = 1)
pdf_lista <- extract_tables(
temp,
output = "data.frame",
pages = c(1),
area = list(
c(393.4206, 124.5549, 536.8005, 394.7037 )
),
guess = FALSE,
encoding = "UTF-8"
)
df <- data.frame(pdf_lista[[1]])
df <-
df %>%
rename(Edad = Grupo.de.Edad) %>%
select(Edad, Mujer, Hombre)
df$Edad <- factor(df$Edad, levels = c("10-19 años", "20-29 años", "30-39 años", "40-49 años", "50-59 años", "60-69 años","70-79 años" ,"80-89 años", ">=90 años" ), ordered = TRUE)
p <-
df %>%
pivot_longer(cols = c('Mujer', 'Hombre'), names_to = 'Genero', values_to = 'Casos') %>%
ggplot(aes(x = Edad, y = Casos, fill = Genero)) +
geom_bar(stat="identity", position=position_dodge())+
theme_minimal() +
scale_fill_manual(values=c('#999999','#E69F00')) +
geom_text(aes(label = Casos), vjust=-1, color="black",
position = position_dodge(0.9), size=3.5) +
labs(title = 'COVID-19 en Canarias',
subtitle = 'Número de casos detectados por grupo de edad y género',
fill = 'Género',
caption = "Fuente: Consejería de Sanidad, Gobierno de Canarias\nActualización: 16/03/2020") +
theme(plot.title = element_text(size=20),
plot.subtitle = element_text(size = 17),
axis.text = element_text(size=14),
axis.title = element_text(size=14),
legend.title = element_text(size=14),
legend.text = element_text(size=13))
ggsave(plot = p, filename = "~/Descargas/covid_canarias.png", width = 14, height = 10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment