Created
March 25, 2022 01:31
-
-
Save thoughtfulbloke/b1ef93b6306e87b9f73fa5a1d11cb965 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rvest) | |
library(dplyr) | |
library(tidyr) | |
library(lubridate) | |
library(stringr) | |
library(readr) | |
library(ggplot2) | |
library(ggthemes) | |
library(patchwork) | |
source("davidise.R") | |
six_cols <- colorblind_pal()(6) | |
smoothing <- 0.55 | |
# saving reports from the website as html into a folder | |
# called news_item_html | |
MiQnews <- list.files("../news_item_html", pattern=".*html$") | |
extract_numbers <- function(x) { | |
file_w._path <- paste0("../news_item_html/",x) | |
report_date <- ymd(x) | |
li_items <- file_w._path %>% | |
read_html() %>% | |
html_nodes("li") %>% | |
html_text() %>% grep(pattern="Cases in hospital", value=TRUE, ignore.case = TRUE) | |
p_items <- file_w._path %>% | |
read_html() %>% | |
html_nodes("p") %>% | |
html_text() %>% grep(pattern="Cases in hospital", value=TRUE, ignore.case = TRUE) | |
if(length(li_items) == 0){li_items <- p_items} | |
if(is.null(li_items)){return(NULL)} | |
words <- unlist(strsplit(gsub(" "," ",li_items)," ")) | |
if(length(words) < 3){return(NULL)} | |
report <- data.frame(Date =rep(report_date, length=length(words)), | |
words, stringsAsFactors = FALSE) %>% | |
mutate(lag2= lag(words,2), | |
before = lag(words), | |
Numeric = as.numeric(gsub("[^1234567890]","",words))) %>% | |
filter(!is.na(Numeric)) | |
return(report) | |
} | |
list_of_reports <- lapply(MiQnews, extract_numbers) | |
#the case when gets added to most days as the comms folk pick new | |
#ways of writing the DHBs | |
over_time <- bind_rows(list_of_reports) %>% | |
mutate(before = gsub("[;:,]","", before)) %>% | |
select(-words) %>% | |
filter(!before %in% c("27","32")) %>% | |
mutate(where = gsub("^[0123456789;, ]+","",paste(lag2, before)), | |
Location = case_when(where == "in hospital" ~ "Total", | |
where == "Auckland" ~ "Auckland", | |
where == "Tauranga" ~ "Bay of Plenty", | |
where == "Waikato" ~ "Waikato", | |
where == "Rotorua" ~ "Lakes", | |
where == "Hawkes Bay" ~ "Hawke's Bay", | |
where == "Christchurch" ~ "Canterbury", | |
where == "(Total Number" ~ "Total", | |
where == "infections): Whangārei" ~ "Northland", | |
where == "total number" ~ "Total", | |
where == "MidCentral" ~ "MidCentral", | |
where == "Tairāwhiti" ~ "Tairāwhiti", | |
where == "Hutt Valley" ~ "Hutt Valley", | |
where == "Southern" ~ "Southern", | |
where == "BOP" ~ "Bay of Plenty", | |
where == "Whanganui" ~ "Whanganui", | |
where == "Waitemata" ~ "Waitematā", | |
where == "Timaru" ~ "South Canterbury", | |
where == "North Shore" ~ "Waitematā", | |
where == "Middlemore" ~ "Counties Manukau", | |
where == "Lakes" ~ "Lakes", | |
where == "Northland" ~ "Northland", | |
where == "Hawke’s Bay" ~ "Hawke's Bay", | |
where == "Wellington" ~ "Capital & Coast", | |
where == "of Plenty" ~ "Bay of Plenty", | |
where == "Total Number" ~ "Total", | |
where == ": Whangārei" ~ "Northland", | |
where == "Tairawhiti" ~ "Tairāwhiti", | |
where == ": Northland" ~ "Northland", | |
where == "Canterbury" ~ "Canterbury", | |
where == "and Coast" ~ "Capital & Coast", | |
where == "Taranaki" ~ "Taranaki", | |
where == "Nelson Marlborough" ~ "Nelson Marlborough", | |
where == "Wairarapa" ~ "Wairarapa", | |
where == "Counties Manukau" ~ "Counties Manukau", | |
where == "Waitematā" ~ "Waitematā", | |
where == "South Canterbury" ~ "South Canterbury", | |
where == "Capital & Coast" ~ "Capital & Coast", | |
where == "West Coast" ~ "West Coast")) %>% | |
select(Date, Location, Numeric) %>% | |
spread(key=Location, value=Numeric, fill=0) %>% | |
gather(key="DHB", value="Hospitalisations", -Date) %>% | |
filter(DHB != "Total") | |
unique_DHBs <- unique(over_time$DHB) | |
######################### | |
DHBSet <- unique_DHBs[1:5] | |
nz1 <- over_time %>% filter(DHB %in% DHBSet) | |
grf1 <- ggplot(nz1, aes(x=Date,y=Hospitalisations)) + | |
geom_line(aes(colour=DHB,linetype=DHB)) + | |
geom_point(aes(shape=DHB, colour=DHB)) + | |
scale_colour_manual(values=six_cols[c(1:4,6)]) + | |
labs(title=paste(DHBSet[1],"to", DHBSet[5]), | |
y="Cases", x="") + | |
theme_minimal(base_family="OpenSans") + | |
theme(panel.grid = element_blank(), | |
axis.line.x = element_blank(), | |
axis.line.y = element_line(size=0.1), | |
axis.ticks.y = element_line(size=0.2), | |
axis.ticks.x = element_blank(), | |
axis.text.x = element_blank(), | |
panel.background = element_rect(fill = "#FFFFFF", colour = "#FFFFFF"), | |
plot.background = element_rect(fill = "#FCFCFC"), | |
plot.caption = element_text(margin=margin(t = 5, r = 5, b = 5, l = 5, unit = "pt"), | |
size=11, hjust=1), | |
plot.caption.position = "plot", | |
legend.position = "right", | |
panel.grid.major.y = element_line(color ="#BBBBBB", size = 0.1,linetype = 1)) | |
grf1 | |
DHBSet <- unique_DHBs[6:10] | |
nz1 <- over_time %>% filter(DHB %in% DHBSet) | |
grf2 <- ggplot(nz1, aes(x=Date,y=Hospitalisations)) + | |
geom_line(aes(colour=DHB,linetype=DHB)) + | |
geom_point(aes(shape=DHB, colour=DHB)) + | |
scale_colour_manual(values=six_cols[c(1:4,6)]) + | |
labs(title=paste(DHBSet[1],"to", DHBSet[5]), | |
y="Cases", x="") + | |
theme_minimal(base_family="OpenSans") + | |
theme(panel.grid = element_blank(), | |
axis.line.x = element_blank(), | |
axis.line.y = element_line(size=0.1), | |
axis.ticks.y = element_line(size=0.2), | |
axis.ticks.x = element_blank(), | |
axis.text.x = element_blank(), | |
panel.background = element_rect(fill = "#FFFFFF", colour = "#FFFFFF"), | |
plot.background = element_rect(fill = "#FCFCFC"), | |
plot.caption = element_text(margin=margin(t = 5, r = 5, b = 5, l = 5, unit = "pt"), | |
size=11, hjust=1), | |
plot.caption.position = "plot", | |
legend.position = "right", | |
panel.grid.major.y = element_line(color ="#BBBBBB", size = 0.1,linetype = 1)) | |
grf2 | |
DHBSet <- unique_DHBs[11:15] | |
nz1 <- over_time %>% filter(DHB %in% DHBSet) | |
grf3 <- ggplot(nz1, aes(x=Date,y=Hospitalisations)) + | |
geom_line(aes(colour=DHB,linetype=DHB)) + | |
geom_point(aes(shape=DHB, colour=DHB)) + | |
scale_colour_manual(values=six_cols[c(1:4,6)]) + | |
labs(title=paste(DHBSet[1],"to", DHBSet[5]), | |
y="Cases", x="") + | |
theme_minimal(base_family="OpenSans") + | |
theme(panel.grid = element_blank(), | |
axis.line.x = element_blank(), | |
axis.line.y = element_line(size=0.1), | |
axis.ticks.y = element_line(size=0.2), | |
axis.ticks.x = element_blank(), | |
axis.text.x = element_blank(), | |
panel.background = element_rect(fill = "#FFFFFF", colour = "#FFFFFF"), | |
plot.background = element_rect(fill = "#FCFCFC"), | |
plot.caption = element_text(margin=margin(t = 5, r = 5, b = 5, l = 5, unit = "pt"), | |
size=11, hjust=1), | |
plot.caption.position = "plot", | |
legend.position = "right", | |
panel.grid.major.y = element_line(color ="#BBBBBB", size = 0.1,linetype = 1)) | |
grf3 | |
DHBSet <- unique_DHBs[16:length(unique_DHBs)] | |
nz1 <- over_time %>% filter(DHB %in% DHBSet) | |
grf4 <- ggplot(nz1, aes(x=Date,y=Hospitalisations)) + | |
geom_line(aes(colour=DHB,linetype=DHB)) + | |
geom_point(aes(shape=DHB, colour=DHB)) + | |
scale_colour_manual(values=six_cols[c(1:4,6)]) + | |
labs(title=paste(DHBSet[1],"to", DHBSet[length(DHBSet)]), | |
y="Cases", x="") + | |
theme_minimal(base_family="OpenSans") + | |
theme(panel.grid = element_blank(), | |
axis.line.x = element_blank(), | |
axis.line.y = element_line(size=0.1), | |
axis.ticks.y = element_line(size=0.2), | |
axis.ticks.x = element_blank(), | |
axis.text.x = element_blank(), | |
panel.background = element_rect(fill = "#FFFFFF", colour = "#FFFFFF"), | |
plot.background = element_rect(fill = "#FCFCFC"), | |
plot.caption = element_text(margin=margin(t = 5, r = 5, b = 5, l = 5, unit = "pt"), | |
size=11, hjust=1), | |
plot.caption.position = "plot", | |
legend.position = "right", | |
panel.grid.major.y = element_line(color ="#BBBBBB", size = 0.1,linetype = 1)) | |
grf4 | |
grfall <- grf1 + grf2 + grf3 + grf4 + plot_layout(ncol=2) + | |
plot_annotation( | |
title = "Number of Hospitalisations for each DHB", | |
caption = "@thoughtfulnz Source: NZ MoH 1pm News Items" | |
) | |
ggsave(filename="~/Desktop/DHB_hospitalisations.png", plot=grfall,dpi=72, units="in", | |
bg="white", height = 5.556 * 1.6, width=9.877* 1.6) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment