tonymugen/flxCovidTrack.R

## flxCovidTrack.R
#
# Copyright (c) 2020 Anthony J. Greenberg
#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
# THE POSSIBILITY OF SUCH DAMAGE.
#

library(data.table)
library(ggplot2)

#
# This script downloads New York State COVID-19 test data and plots various statistics.
# The plots are exported as HTML widgets for embedding on a website.
# Plotting and export require ggplot2, plotly, and htmlwidgets. Data wrangling requires data.table.
# Interactions with the New York State DOH API require RSocrata.
#

# Download the NYS data.

nysDT    <- as.data.table(RSocrata::read.socrata(
                        "https://health.data.ny.gov/resource/xdss-u53e.json",
                         stringsAsFactors=FALSE))
nysDT    <- nysDT[, test_date := as.Date(test_date)]
num.cols <- c("new_positives", "cumulative_number_of_positives",
              "total_number_of_tests", "cumulative_number_of_tests")
nysDT    <- nysDT[, (num.cols) := lapply(.SD, as.integer), .SDcols = num.cols]

#
# Extract the data for Tompkins and surrounding counties.
#
region <- data.table(county = c("Tompkins", "Tioga", "Chemung",
                              "Schuyler", "Seneca", "Cayuga",
                              "Cortland", "Broome", "Onondaga"),
                     population = c(103000, 49000, 86000, 18000, 35000,
                                  78000, 48000, 194000, 464000))
nysDT <- nysDT[county %in% region[, county],]
locDT <- nysDT[county != "Tompkins", lapply(.SD, sum), by = test_date,
             .SDcols = c("new_positives", "cumulative_number_of_positives",
                       "total_number_of_tests", "cumulative_number_of_tests")]
tomDT <- nysDT[county == "Tompkins",
               c("test_date", "new_positives", "cumulative_number_of_positives",
                 "total_number_of_tests", "cumulative_number_of_tests")]
#
# Separate the Tompkins and whole region data; calculate per capita statistics.
#
tomDT <- tomDT[, data := "Tompkins"]
locDT <- locDT[, data := "region"]
locDT <- rbind(tomDT, locDT)
locDT <- locDT[, population := rep(
                     c(region[county == "Tompkins", sum(population)],
                       region[county != "Tompkins", sum(population)]),
                     times = unlist(locDT[, .N, by = data][, 2]))]
locDT <- locDT[, pc_tests         := total_number_of_tests / population]
locDT <- locDT[, percent_positive := 100 * new_positives / total_number_of_tests]
locDT <- locDT[, pctPosMn := frollmean(percent_positive, 7, na.rm = TRUE)]

#
# Get Tompkins county hospitalization data and add to what I already have
#
prevData <- fread("../tcDOHactiveHosp.tsv")
orevData <- prevData[, date := as.Date(date)]
curlCmd  <- paste0("curl -s https://tompkinscountyny.gov/health#table",
                   " | grep -B 8 'END TABLE' | head -1 |",
                   " sed 's/\\s\\+//g' | sed 's/<\\/td>//'")
ahToday  <- system(curlCmd, intern = FALSE)
prevData <- rbind(prevData,
             data.table(date = Sys.Date(), active.hospitalizations = ahToday))
fwrite(prevData, file="../tcDOHactiveHosp.tsv", sep = "\t", quote = FALSE, na = "NA")
prevData <- prevData[,
                rlMean := frollmean(active.hospitalizations, 7, na.rm = TRUE)]
#
# Plot and export to HTML
#
ggp <- ggplot(data = locDT,
              aes(x = test_date, y = new_positives, fill = data)) +
       geom_col() +
       theme_classic(base_size = 18) +
       theme(legend.title = element_blank(), axis.title.x = element_blank()) +
       ylab("daily number of positives")
py  <- plotly::ggplotly(ggp)
htmlwidgets::saveWidget(plotly::as_widget(py), "positivesPlot.html")

ggp <- ggplot(data = locDT,
              aes(x = test_date, y = total_number_of_tests, fill = data)) +
       geom_col() +
       theme_classic(base_size = 18) +
       theme(legend.title = element_blank(), axis.title.x = element_blank()) +
       ylab("daily number of tests")
py  <- plotly::ggplotly(ggp)
htmlwidgets::saveWidget(plotly::as_widget(py), "testsPlot.html")

ggp <- ggplot(data = locDT, aes(x = test_date, y = pc_tests, fill = data)) +
       geom_col() +
       theme_classic(base_size = 18) +
       theme(legend.title = element_blank(), axis.title.x = element_blank()) +
       ylab("daily number of tests per capita")
py  <- plotly::ggplotly(ggp)
htmlwidgets::saveWidget(plotly::as_widget(py), "pcTestsPlot.html")

ggp <- ggplot(data = locDT,
              aes(x = test_date, y = percent_positive, color = data)) +
       geom_point(size = 1, alpha = 0.5) +
       geom_line(data = locDT,
                 aes(x = test_date, y = pctPosMn, color = data), size = 1.2) +
       theme_classic(base_size = 18) +
       theme(legend.title = element_blank(), axis.title.x = element_blank()) +
       ylab("% positive")
py  <- plotly::ggplotly(ggp)
htmlwidgets::saveWidget(plotly::as_widget(py), "pctPositivePlot.html")

ggp <- ggplot(data = prevData, aes(x = date, y = active.hospitalizations)) +
       geom_col(fill = "grey60") +
       theme_classic(base_size = 18) +
       ylab("number of people hospitalized")
py  <- plotly::ggplotly(ggp)
htmlwidgets::saveWidget(plotly::as_widget(py), "tcDOHhosp.html")
	#
	# Copyright (c) 2020 Anthony J. Greenberg
	#
	# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
	#
	# 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
	#
	# 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
	#
	# 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
	#
	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
	# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
	# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
	# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
	# THE POSSIBILITY OF SUCH DAMAGE.
	#

	library(data.table)
	library(ggplot2)

	#
	# This script downloads New York State COVID-19 test data and plots various statistics.
	# The plots are exported as HTML widgets for embedding on a website.
	# Plotting and export require ggplot2, plotly, and htmlwidgets. Data wrangling requires data.table.
	# Interactions with the New York State DOH API require RSocrata.
	#

	# Download the NYS data.

	nysDT <- as.data.table(RSocrata::read.socrata(
	"https://health.data.ny.gov/resource/xdss-u53e.json",
	stringsAsFactors=FALSE))
	nysDT <- nysDT[, test_date := as.Date(test_date)]
	num.cols <- c("new_positives", "cumulative_number_of_positives",
	"total_number_of_tests", "cumulative_number_of_tests")
	nysDT <- nysDT[, (num.cols) := lapply(.SD, as.integer), .SDcols = num.cols]

	#
	# Extract the data for Tompkins and surrounding counties.
	#
	region <- data.table(county = c("Tompkins", "Tioga", "Chemung",
	"Schuyler", "Seneca", "Cayuga",
	"Cortland", "Broome", "Onondaga"),
	population = c(103000, 49000, 86000, 18000, 35000,
	78000, 48000, 194000, 464000))
	nysDT <- nysDT[county %in% region[, county],]
	locDT <- nysDT[county != "Tompkins", lapply(.SD, sum), by = test_date,
	.SDcols = c("new_positives", "cumulative_number_of_positives",
	"total_number_of_tests", "cumulative_number_of_tests")]
	tomDT <- nysDT[county == "Tompkins",
	c("test_date", "new_positives", "cumulative_number_of_positives",
	"total_number_of_tests", "cumulative_number_of_tests")]
	#
	# Separate the Tompkins and whole region data; calculate per capita statistics.
	#
	tomDT <- tomDT[, data := "Tompkins"]
	locDT <- locDT[, data := "region"]
	locDT <- rbind(tomDT, locDT)
	locDT <- locDT[, population := rep(
	c(region[county == "Tompkins", sum(population)],
	region[county != "Tompkins", sum(population)]),
	times = unlist(locDT[, .N, by = data][, 2]))]
	locDT <- locDT[, pc_tests := total_number_of_tests / population]
	locDT <- locDT[, percent_positive := 100 * new_positives / total_number_of_tests]
	locDT <- locDT[, pctPosMn := frollmean(percent_positive, 7, na.rm = TRUE)]

	#
	# Get Tompkins county hospitalization data and add to what I already have
	#
	prevData <- fread("../tcDOHactiveHosp.tsv")
	orevData <- prevData[, date := as.Date(date)]
	curlCmd <- paste0("curl -s https://tompkinscountyny.gov/health#table",
	" \| grep -B 8 'END TABLE' \| head -1 \|",
	" sed 's/\\s\\+//g' \| sed 's/<\\/td>//'")
	ahToday <- system(curlCmd, intern = FALSE)
	prevData <- rbind(prevData,
	data.table(date = Sys.Date(), active.hospitalizations = ahToday))
	fwrite(prevData, file="../tcDOHactiveHosp.tsv", sep = "\t", quote = FALSE, na = "NA")
	prevData <- prevData[,
	rlMean := frollmean(active.hospitalizations, 7, na.rm = TRUE)]
	#
	# Plot and export to HTML
	#
	ggp <- ggplot(data = locDT,
	aes(x = test_date, y = new_positives, fill = data)) +
	geom_col() +
	theme_classic(base_size = 18) +
	theme(legend.title = element_blank(), axis.title.x = element_blank()) +
	ylab("daily number of positives")
	py <- plotly::ggplotly(ggp)
	htmlwidgets::saveWidget(plotly::as_widget(py), "positivesPlot.html")

	ggp <- ggplot(data = locDT,
	aes(x = test_date, y = total_number_of_tests, fill = data)) +
	geom_col() +
	theme_classic(base_size = 18) +
	theme(legend.title = element_blank(), axis.title.x = element_blank()) +
	ylab("daily number of tests")
	py <- plotly::ggplotly(ggp)
	htmlwidgets::saveWidget(plotly::as_widget(py), "testsPlot.html")

	ggp <- ggplot(data = locDT, aes(x = test_date, y = pc_tests, fill = data)) +
	geom_col() +
	theme_classic(base_size = 18) +
	theme(legend.title = element_blank(), axis.title.x = element_blank()) +
	ylab("daily number of tests per capita")
	py <- plotly::ggplotly(ggp)
	htmlwidgets::saveWidget(plotly::as_widget(py), "pcTestsPlot.html")

	ggp <- ggplot(data = locDT,
	aes(x = test_date, y = percent_positive, color = data)) +
	geom_point(size = 1, alpha = 0.5) +
	geom_line(data = locDT,
	aes(x = test_date, y = pctPosMn, color = data), size = 1.2) +
	theme_classic(base_size = 18) +
	theme(legend.title = element_blank(), axis.title.x = element_blank()) +
	ylab("% positive")
	py <- plotly::ggplotly(ggp)
	htmlwidgets::saveWidget(plotly::as_widget(py), "pctPositivePlot.html")

	ggp <- ggplot(data = prevData, aes(x = date, y = active.hospitalizations)) +
	geom_col(fill = "grey60") +
	theme_classic(base_size = 18) +
	ylab("number of people hospitalized")
	py <- plotly::ggplotly(ggp)
	htmlwidgets::saveWidget(plotly::as_widget(py), "tcDOHhosp.html")