Created April 5, 2020 03:27
title: "COVID-19 Worldometer data"
output: html_notebook
This notebook pulls a table with useful information out of ![worldometer]( and then makes a graph.
# load GDP data from the world bank
wb <- WDI(indicator = c('gdppc' = "NY.GDP.PCAP.CD"), country = 'all',
start = 2018, end = 2018, extra = TRUE) %>%
mutate(equator_km = abs(as.numeric(as.character(latitude))) * 111) %>%
select(iso3c, gdppc, equator_km, latitude)
#load html data
dat <- read_html("") %>%
html_node("#main_table_countries_today") %>%
html_table() %>%
rename(Test_1m = "Tests/1M pop",
Country = "Country,Other") %>%
filter(Country != "World" & Country != "Total:") %>%
na_if("") %>%
select(-contains("New")) %>%
mutate_at(vars(-Country), ~str_remove_all(., ",")) %>%
mutate_at(vars(-Country), ~as.numeric(.)) %>%
mutate(iso3c = countrycode(Country, '', 'iso3c')) %>%
left_join(wb, by = 'iso3c')
# Graph of test vs cases
dat %>%
ggplot(aes(x = TotalTests, y = TotalCases)) +
geom_point() +
#geom_smooth(method = "lm", se = F) +
scale_y_continuous(trans = "log2", breaks = 2^seq(1, 20, 2)) +
scale_x_continuous(trans = "log2", breaks = 2^seq(1, 20, 2)) +
theme_minimal() +
labs(x = "Total Number of Tests",
y = "Total Number of Cases",
title = "Cross-national COVID-19 case and test counts correlate closely",
caption = "Data from Worldometer:") +
ggsave("Tests.png", width = 8, height = 5)
# Linear regression models
models <- list()
models[['Bivariate']] <- lm(TotalCases ~ TotalTests, dat)
models[['Controls']] <- lm(TotalCases ~ TotalTests + I(log(gdppc)) + equator_km, dat)
cm <- c('TotalTests' = '# of Tests',
'I(log(gdppc))' = 'log(GDP/cap)',
'equator_km' = 'Distance from equator',
'(Intercept)' = 'Constant')
msummary(models, coef_map = cm, title = 'Dependent variable: Total number of covid-19 cases.')
