TimTaylor/scrape.R

## scrape.R
library(rvest)

# parameters
url <- "https://www.gov.uk/government/publications/coronavirus-covid-19-number-of-cases-in-england/coronavirus-covid-19-number-of-cases-in-england"
filename <- "~/cases.csv"

# get data
html <- read_html(url)
overview <- html_text(html_nodes(html, "#contents p:nth-child(1)"), trim = TRUE)
headings <- html_text(html_nodes(html, "th, th"), trim = TRUE)
content <- html_text(html_nodes(html, "td"), trim = TRUE)
authority <- content[c(TRUE, FALSE)]
cases <- as.integer(content[c(FALSE, TRUE)])
table <- data.frame(authority, as.integer(cases), stringsAsFactors = FALSE)
colnames(table) <- headings
cat(overview)

# save table to file
write.csv(table, filename, row.names = FALSE)
	library(rvest)

	# parameters
	url <- "https://www.gov.uk/government/publications/coronavirus-covid-19-number-of-cases-in-england/coronavirus-covid-19-number-of-cases-in-england"
	filename <- "~/cases.csv"

	# get data
	html <- read_html(url)
	overview <- html_text(html_nodes(html, "#contents p:nth-child(1)"), trim = TRUE)
	headings <- html_text(html_nodes(html, "th, th"), trim = TRUE)
	content <- html_text(html_nodes(html, "td"), trim = TRUE)
	authority <- content[c(TRUE, FALSE)]
	cases <- as.integer(content[c(FALSE, TRUE)])
	table <- data.frame(authority, as.integer(cases), stringsAsFactors = FALSE)
	colnames(table) <- headings
	cat(overview)

	# save table to file
	write.csv(table, filename, row.names = FALSE)