#devtools::install_github("RamiKrispin/coronavirus") |
library(coronavirus) |
library(dplyr) |
library(ggplot2) |
library(tidyr) |
library(rnaturalearth) |
library(sf) |
#get a world map |
worldmap <- ne_countries(returnclass = "sf") |
#filter data to confirmed |
coronavirus_sf <- coronavirus %>% |
st_as_sf(coords = c("Long", "Lat"), crs = st_crs(worldmap)) %>% |
mutate(date_factor = as.factor(date)) |
#get geospatial info about where the coronavirus is |
joined_corona <- st_join(coronavirus_sf, worldmap) %>% |
rename(type = type.x) |
coronavirus_wide_sf <- joined_corona %>% |
#make wide for easier calculation of total # infected |
spread(type, cases) %>% |
#not sure why values_fill isn't working... |
mutate(death = ifelse(is.na(death),0,death), |
recovered = ifelse(is.na(recovered),0,recovered), |
confirmed = ifelse(is.na(confirmed),0,confirmed)) |
coronavirus_country_summary <- coronavirus_wide_sf %>% |
#get total cases by country by date |
group_by(Country.Region, date) %>% |
summarize(cases = sum(confirmed) - sum(death) - sum(recovered) ) %>% |
ungroup() %>% |
#make sure every country is represented every date |
complete(Country.Region, date, fill = list(cases = 0)) %>% |
#arrange into a sensible order |
arrange(date, desc(cases)) %>% |
#get running sum |
group_by(Country.Region) %>% |
mutate(total_cases = cumsum(cases), |
first_case = min(date[which(cases>0)])) %>% |
ungroup() %>% |
#arrange into a sensible order |
arrange(first_case, desc(total_cases)) %>% |
mutate(Country.Region = forcats::fct_inorder(Country.Region) %>% forcats::fct_rev()) |
#plot by country #### |
ggplot(coronavirus_country_summary, |
aes(x = date, y = Country.Region, fill = log10(total_cases))) + |
geom_tile(size = 0.2, colour="white") + |
scale_fill_viridis_c(option = "A", na.value="white", |
breaks = 1:5, labels = c(1:5)^10, |
guide = guide_colorbar("Total Cases")) + |
theme_minimal() + |
xlab("") + ylab("") + |
ggtitle("Progression of Total # of Cases of Coronavirus")+ |
theme(plot.title.position = "plot") |
#let's do the same by subregion #### |
coronavirus_subregion_summary <- coronavirus_wide_sf %>% |
#get total cases by country by date |
group_by(subregion, date) %>% |
summarize(cases = sum(confirmed) - sum(death) - sum(recovered) ) %>% |
ungroup() %>% |
#make sure every country is represented every date |
complete(subregion, date, fill = list(cases = 0)) %>% |
#arrange into a sensible order |
arrange(date, desc(cases)) %>% |
#get running sum |
group_by(subregion) %>% |
mutate(total_cases = cumsum(cases), |
first_case = min(date[which(cases>0)])) %>% |
ungroup() %>% |
#arrange into a sensible order |
arrange(first_case, desc(total_cases)) %>% |
mutate(subregion = forcats::fct_inorder(subregion) %>% forcats::fct_rev()) %>% |
filter(!is.na(subregion)) #figure out what is here... |
ggplot(coronavirus_subregion_summary, |
aes(x = date, y = subregion, fill = log10(total_cases))) + |
geom_tile(size = 0.2, colour="white") + |
scale_fill_viridis_c(option = "A", na.value="white", |
breaks = 1:5, labels = c(1:5)^10, |
guide = guide_colorbar("Total Cases")) + |
theme_minimal() + |
xlab("") + ylab("") + |
ggtitle("Total # of Cases of Coronavirus Over Time", subtitle = "subregions from naturalearth subregions\ncases = confirmed - deaths - recovered")+ |
theme(plot.title.position = "plot") |