Skip to content

Instantly share code, notes, and snippets.

@thoughtfulbloke
Created March 25, 2022 01:31
Show Gist options
  • Save thoughtfulbloke/b1ef93b6306e87b9f73fa5a1d11cb965 to your computer and use it in GitHub Desktop.
Save thoughtfulbloke/b1ef93b6306e87b9f73fa5a1d11cb965 to your computer and use it in GitHub Desktop.
library(rvest)
library(dplyr)
library(tidyr)
library(lubridate)
library(stringr)
library(readr)
library(ggplot2)
library(ggthemes)
library(patchwork)
source("davidise.R")
six_cols <- colorblind_pal()(6)
smoothing <- 0.55
# saving reports from the website as html into a folder
# called news_item_html
MiQnews <- list.files("../news_item_html", pattern=".*html$")
extract_numbers <- function(x) {
file_w._path <- paste0("../news_item_html/",x)
report_date <- ymd(x)
li_items <- file_w._path %>%
read_html() %>%
html_nodes("li") %>%
html_text() %>% grep(pattern="Cases in hospital", value=TRUE, ignore.case = TRUE)
p_items <- file_w._path %>%
read_html() %>%
html_nodes("p") %>%
html_text() %>% grep(pattern="Cases in hospital", value=TRUE, ignore.case = TRUE)
if(length(li_items) == 0){li_items <- p_items}
if(is.null(li_items)){return(NULL)}
words <- unlist(strsplit(gsub("&nbsp;"," ",li_items)," "))
if(length(words) < 3){return(NULL)}
report <- data.frame(Date =rep(report_date, length=length(words)),
words, stringsAsFactors = FALSE) %>%
mutate(lag2= lag(words,2),
before = lag(words),
Numeric = as.numeric(gsub("[^1234567890]","",words))) %>%
filter(!is.na(Numeric))
return(report)
}
list_of_reports <- lapply(MiQnews, extract_numbers)
#the case when gets added to most days as the comms folk pick new
#ways of writing the DHBs
over_time <- bind_rows(list_of_reports) %>%
mutate(before = gsub("[;:,]","", before)) %>%
select(-words) %>%
filter(!before %in% c("27","32")) %>%
mutate(where = gsub("^[0123456789;, ]+","",paste(lag2, before)),
Location = case_when(where == "in hospital" ~ "Total",
where == "Auckland" ~ "Auckland",
where == "Tauranga" ~ "Bay of Plenty",
where == "Waikato" ~ "Waikato",
where == "Rotorua" ~ "Lakes",
where == "Hawkes Bay" ~ "Hawke's Bay",
where == "Christchurch" ~ "Canterbury",
where == "(Total Number" ~ "Total",
where == "infections): Whangārei" ~ "Northland",
where == "total number" ~ "Total",
where == "MidCentral" ~ "MidCentral",
where == "Tairāwhiti" ~ "Tairāwhiti",
where == "Hutt Valley" ~ "Hutt Valley",
where == "Southern" ~ "Southern",
where == "BOP" ~ "Bay of Plenty",
where == "Whanganui" ~ "Whanganui",
where == "Waitemata" ~ "Waitematā",
where == "Timaru" ~ "South Canterbury",
where == "North Shore" ~ "Waitematā",
where == "Middlemore" ~ "Counties Manukau",
where == "Lakes" ~ "Lakes",
where == "Northland" ~ "Northland",
where == "Hawke’s Bay" ~ "Hawke's Bay",
where == "Wellington" ~ "Capital & Coast",
where == "of Plenty" ~ "Bay of Plenty",
where == "Total Number" ~ "Total",
where == ": Whangārei" ~ "Northland",
where == "Tairawhiti" ~ "Tairāwhiti",
where == ": Northland" ~ "Northland",
where == "Canterbury" ~ "Canterbury",
where == "and Coast" ~ "Capital & Coast",
where == "Taranaki" ~ "Taranaki",
where == "Nelson Marlborough" ~ "Nelson Marlborough",
where == "Wairarapa" ~ "Wairarapa",
where == "Counties Manukau" ~ "Counties Manukau",
where == "Waitematā" ~ "Waitematā",
where == "South Canterbury" ~ "South Canterbury",
where == "Capital & Coast" ~ "Capital & Coast",
where == "West Coast" ~ "West Coast")) %>%
select(Date, Location, Numeric) %>%
spread(key=Location, value=Numeric, fill=0) %>%
gather(key="DHB", value="Hospitalisations", -Date) %>%
filter(DHB != "Total")
unique_DHBs <- unique(over_time$DHB)
#########################
DHBSet <- unique_DHBs[1:5]
nz1 <- over_time %>% filter(DHB %in% DHBSet)
grf1 <- ggplot(nz1, aes(x=Date,y=Hospitalisations)) +
geom_line(aes(colour=DHB,linetype=DHB)) +
geom_point(aes(shape=DHB, colour=DHB)) +
scale_colour_manual(values=six_cols[c(1:4,6)]) +
labs(title=paste(DHBSet[1],"to", DHBSet[5]),
y="Cases", x="") +
theme_minimal(base_family="OpenSans") +
theme(panel.grid = element_blank(),
axis.line.x = element_blank(),
axis.line.y = element_line(size=0.1),
axis.ticks.y = element_line(size=0.2),
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
panel.background = element_rect(fill = "#FFFFFF", colour = "#FFFFFF"),
plot.background = element_rect(fill = "#FCFCFC"),
plot.caption = element_text(margin=margin(t = 5, r = 5, b = 5, l = 5, unit = "pt"),
size=11, hjust=1),
plot.caption.position = "plot",
legend.position = "right",
panel.grid.major.y = element_line(color ="#BBBBBB", size = 0.1,linetype = 1))
grf1
DHBSet <- unique_DHBs[6:10]
nz1 <- over_time %>% filter(DHB %in% DHBSet)
grf2 <- ggplot(nz1, aes(x=Date,y=Hospitalisations)) +
geom_line(aes(colour=DHB,linetype=DHB)) +
geom_point(aes(shape=DHB, colour=DHB)) +
scale_colour_manual(values=six_cols[c(1:4,6)]) +
labs(title=paste(DHBSet[1],"to", DHBSet[5]),
y="Cases", x="") +
theme_minimal(base_family="OpenSans") +
theme(panel.grid = element_blank(),
axis.line.x = element_blank(),
axis.line.y = element_line(size=0.1),
axis.ticks.y = element_line(size=0.2),
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
panel.background = element_rect(fill = "#FFFFFF", colour = "#FFFFFF"),
plot.background = element_rect(fill = "#FCFCFC"),
plot.caption = element_text(margin=margin(t = 5, r = 5, b = 5, l = 5, unit = "pt"),
size=11, hjust=1),
plot.caption.position = "plot",
legend.position = "right",
panel.grid.major.y = element_line(color ="#BBBBBB", size = 0.1,linetype = 1))
grf2
DHBSet <- unique_DHBs[11:15]
nz1 <- over_time %>% filter(DHB %in% DHBSet)
grf3 <- ggplot(nz1, aes(x=Date,y=Hospitalisations)) +
geom_line(aes(colour=DHB,linetype=DHB)) +
geom_point(aes(shape=DHB, colour=DHB)) +
scale_colour_manual(values=six_cols[c(1:4,6)]) +
labs(title=paste(DHBSet[1],"to", DHBSet[5]),
y="Cases", x="") +
theme_minimal(base_family="OpenSans") +
theme(panel.grid = element_blank(),
axis.line.x = element_blank(),
axis.line.y = element_line(size=0.1),
axis.ticks.y = element_line(size=0.2),
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
panel.background = element_rect(fill = "#FFFFFF", colour = "#FFFFFF"),
plot.background = element_rect(fill = "#FCFCFC"),
plot.caption = element_text(margin=margin(t = 5, r = 5, b = 5, l = 5, unit = "pt"),
size=11, hjust=1),
plot.caption.position = "plot",
legend.position = "right",
panel.grid.major.y = element_line(color ="#BBBBBB", size = 0.1,linetype = 1))
grf3
DHBSet <- unique_DHBs[16:length(unique_DHBs)]
nz1 <- over_time %>% filter(DHB %in% DHBSet)
grf4 <- ggplot(nz1, aes(x=Date,y=Hospitalisations)) +
geom_line(aes(colour=DHB,linetype=DHB)) +
geom_point(aes(shape=DHB, colour=DHB)) +
scale_colour_manual(values=six_cols[c(1:4,6)]) +
labs(title=paste(DHBSet[1],"to", DHBSet[length(DHBSet)]),
y="Cases", x="") +
theme_minimal(base_family="OpenSans") +
theme(panel.grid = element_blank(),
axis.line.x = element_blank(),
axis.line.y = element_line(size=0.1),
axis.ticks.y = element_line(size=0.2),
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
panel.background = element_rect(fill = "#FFFFFF", colour = "#FFFFFF"),
plot.background = element_rect(fill = "#FCFCFC"),
plot.caption = element_text(margin=margin(t = 5, r = 5, b = 5, l = 5, unit = "pt"),
size=11, hjust=1),
plot.caption.position = "plot",
legend.position = "right",
panel.grid.major.y = element_line(color ="#BBBBBB", size = 0.1,linetype = 1))
grf4
grfall <- grf1 + grf2 + grf3 + grf4 + plot_layout(ncol=2) +
plot_annotation(
title = "Number of Hospitalisations for each DHB",
caption = "@thoughtfulnz Source: NZ MoH 1pm News Items"
)
ggsave(filename="~/Desktop/DHB_hospitalisations.png", plot=grfall,dpi=72, units="in",
bg="white", height = 5.556 * 1.6, width=9.877* 1.6)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment