# | |
# Copyright (c) 2020 Anthony J. Greenberg | |
# | |
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: | |
# | |
# 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. | |
# | |
# 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. | |
# | |
# 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. | |
# | |
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, | |
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS | |
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER | |
# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF | |
# THE POSSIBILITY OF SUCH DAMAGE. | |
# | |
library(data.table) | |
library(ggplot2) | |
# | |
# This script downloads New York State COVID-19 test data and plots various statistics. | |
# The plots are exported as HTML widgets for embedding on a website. | |
# Plotting and export require ggplot2, plotly, and htmlwidgets. Data wrangling requires data.table. | |
# Interactions with the New York State DOH API require RSocrata. | |
# | |
# Download the NYS data. | |
nysDT <- as.data.table(RSocrata::read.socrata( | |
"https://health.data.ny.gov/resource/xdss-u53e.json", | |
stringsAsFactors=FALSE)) | |
nysDT <- nysDT[, test_date := as.Date(test_date)] | |
num.cols <- c("new_positives", "cumulative_number_of_positives", | |
"total_number_of_tests", "cumulative_number_of_tests") | |
nysDT <- nysDT[, (num.cols) := lapply(.SD, as.integer), .SDcols = num.cols] | |
# | |
# Extract the data for Tompkins and surrounding counties. | |
# | |
region <- data.table(county = c("Tompkins", "Tioga", "Chemung", | |
"Schuyler", "Seneca", "Cayuga", | |
"Cortland", "Broome", "Onondaga"), | |
population = c(103000, 49000, 86000, 18000, 35000, | |
78000, 48000, 194000, 464000)) | |
nysDT <- nysDT[county %in% region[, county],] | |
locDT <- nysDT[county != "Tompkins", lapply(.SD, sum), by = test_date, | |
.SDcols = c("new_positives", "cumulative_number_of_positives", | |
"total_number_of_tests", "cumulative_number_of_tests")] | |
tomDT <- nysDT[county == "Tompkins", | |
c("test_date", "new_positives", "cumulative_number_of_positives", | |
"total_number_of_tests", "cumulative_number_of_tests")] | |
# | |
# Separate the Tompkins and whole region data; calculate per capita statistics. | |
# | |
tomDT <- tomDT[, data := "Tompkins"] | |
locDT <- locDT[, data := "region"] | |
locDT <- rbind(tomDT, locDT) | |
locDT <- locDT[, population := rep( | |
c(region[county == "Tompkins", sum(population)], | |
region[county != "Tompkins", sum(population)]), | |
times = unlist(locDT[, .N, by = data][, 2]))] | |
locDT <- locDT[, pc_tests := total_number_of_tests / population] | |
locDT <- locDT[, percent_positive := 100 * new_positives / total_number_of_tests] | |
locDT <- locDT[, pctPosMn := frollmean(percent_positive, 7, na.rm = TRUE)] | |
# | |
# Get Tompkins county hospitalization data and add to what I already have | |
# | |
prevData <- fread("../tcDOHactiveHosp.tsv") | |
orevData <- prevData[, date := as.Date(date)] | |
curlCmd <- paste0("curl -s https://tompkinscountyny.gov/health#table", | |
" | grep -B 8 'END TABLE' | head -1 |", | |
" sed 's/\\s\\+//g' | sed 's/<\\/td>//'") | |
ahToday <- system(curlCmd, intern = FALSE) | |
prevData <- rbind(prevData, | |
data.table(date = Sys.Date(), active.hospitalizations = ahToday)) | |
fwrite(prevData, file="../tcDOHactiveHosp.tsv", sep = "\t", quote = FALSE, na = "NA") | |
prevData <- prevData[, | |
rlMean := frollmean(active.hospitalizations, 7, na.rm = TRUE)] | |
# | |
# Plot and export to HTML | |
# | |
ggp <- ggplot(data = locDT, | |
aes(x = test_date, y = new_positives, fill = data)) + | |
geom_col() + | |
theme_classic(base_size = 18) + | |
theme(legend.title = element_blank(), axis.title.x = element_blank()) + | |
ylab("daily number of positives") | |
py <- plotly::ggplotly(ggp) | |
htmlwidgets::saveWidget(plotly::as_widget(py), "positivesPlot.html") | |
ggp <- ggplot(data = locDT, | |
aes(x = test_date, y = total_number_of_tests, fill = data)) + | |
geom_col() + | |
theme_classic(base_size = 18) + | |
theme(legend.title = element_blank(), axis.title.x = element_blank()) + | |
ylab("daily number of tests") | |
py <- plotly::ggplotly(ggp) | |
htmlwidgets::saveWidget(plotly::as_widget(py), "testsPlot.html") | |
ggp <- ggplot(data = locDT, aes(x = test_date, y = pc_tests, fill = data)) + | |
geom_col() + | |
theme_classic(base_size = 18) + | |
theme(legend.title = element_blank(), axis.title.x = element_blank()) + | |
ylab("daily number of tests per capita") | |
py <- plotly::ggplotly(ggp) | |
htmlwidgets::saveWidget(plotly::as_widget(py), "pcTestsPlot.html") | |
ggp <- ggplot(data = locDT, | |
aes(x = test_date, y = percent_positive, color = data)) + | |
geom_point(size = 1, alpha = 0.5) + | |
geom_line(data = locDT, | |
aes(x = test_date, y = pctPosMn, color = data), size = 1.2) + | |
theme_classic(base_size = 18) + | |
theme(legend.title = element_blank(), axis.title.x = element_blank()) + | |
ylab("% positive") | |
py <- plotly::ggplotly(ggp) | |
htmlwidgets::saveWidget(plotly::as_widget(py), "pctPositivePlot.html") | |
ggp <- ggplot(data = prevData, aes(x = date, y = active.hospitalizations)) + | |
geom_col(fill = "grey60") + | |
theme_classic(base_size = 18) + | |
ylab("number of people hospitalized") | |
py <- plotly::ggplotly(ggp) | |
htmlwidgets::saveWidget(plotly::as_widget(py), "tcDOHhosp.html") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment