Skip to content

Instantly share code, notes, and snippets.

@tonymugen

tonymugen/flxCovidTrack.R

Last active Aug 2, 2020
Embed
What would you like to do?
#
# Copyright (c) 2020 Anthony J. Greenberg
#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
# THE POSSIBILITY OF SUCH DAMAGE.
#
library(data.table)
library(ggplot2)
#
# This script downloads New York State COVID-19 test data and plots various statistics.
# The plots are exported as HTML widgets for embedding on a website.
# Plotting and export require ggplot2, plotly, and htmlwidgets. Data wrangling requires data.table.
# Interactions with the New York State DOH API require RSocrata.
#
# Download the NYS data.
nysDT <- as.data.table(RSocrata::read.socrata(
"https://health.data.ny.gov/resource/xdss-u53e.json",
stringsAsFactors=FALSE))
nysDT <- nysDT[, test_date := as.Date(test_date)]
num.cols <- c("new_positives", "cumulative_number_of_positives",
"total_number_of_tests", "cumulative_number_of_tests")
nysDT <- nysDT[, (num.cols) := lapply(.SD, as.integer), .SDcols = num.cols]
#
# Extract the data for Tompkins and surrounding counties.
#
region <- data.table(county = c("Tompkins", "Tioga", "Chemung",
"Schuyler", "Seneca", "Cayuga",
"Cortland", "Broome", "Onondaga"),
population = c(103000, 49000, 86000, 18000, 35000,
78000, 48000, 194000, 464000))
nysDT <- nysDT[county %in% region[, county],]
locDT <- nysDT[county != "Tompkins", lapply(.SD, sum), by = test_date,
.SDcols = c("new_positives", "cumulative_number_of_positives",
"total_number_of_tests", "cumulative_number_of_tests")]
tomDT <- nysDT[county == "Tompkins",
c("test_date", "new_positives", "cumulative_number_of_positives",
"total_number_of_tests", "cumulative_number_of_tests")]
#
# Separate the Tompkins and whole region data; calculate per capita statistics.
#
tomDT <- tomDT[, data := "Tompkins"]
locDT <- locDT[, data := "region"]
locDT <- rbind(tomDT, locDT)
locDT <- locDT[, population := rep(
c(region[county == "Tompkins", sum(population)],
region[county != "Tompkins", sum(population)]),
times = unlist(locDT[, .N, by = data][, 2]))]
locDT <- locDT[, pc_tests := total_number_of_tests / population]
locDT <- locDT[, percent_positive := 100 * new_positives / total_number_of_tests]
locDT <- locDT[, pctPosMn := frollmean(percent_positive, 7, na.rm = TRUE)]
#
# Get Tompkins county hospitalization data and add to what I already have
#
prevData <- fread("../tcDOHactiveHosp.tsv")
orevData <- prevData[, date := as.Date(date)]
curlCmd <- paste0("curl -s https://tompkinscountyny.gov/health#table",
" | grep -B 8 'END TABLE' | head -1 |",
" sed 's/\\s\\+//g' | sed 's/<\\/td>//'")
ahToday <- system(curlCmd, intern = FALSE)
prevData <- rbind(prevData,
data.table(date = Sys.Date(), active.hospitalizations = ahToday))
fwrite(prevData, file="../tcDOHactiveHosp.tsv", sep = "\t", quote = FALSE, na = "NA")
prevData <- prevData[,
rlMean := frollmean(active.hospitalizations, 7, na.rm = TRUE)]
#
# Plot and export to HTML
#
ggp <- ggplot(data = locDT,
aes(x = test_date, y = new_positives, fill = data)) +
geom_col() +
theme_classic(base_size = 18) +
theme(legend.title = element_blank(), axis.title.x = element_blank()) +
ylab("daily number of positives")
py <- plotly::ggplotly(ggp)
htmlwidgets::saveWidget(plotly::as_widget(py), "positivesPlot.html")
ggp <- ggplot(data = locDT,
aes(x = test_date, y = total_number_of_tests, fill = data)) +
geom_col() +
theme_classic(base_size = 18) +
theme(legend.title = element_blank(), axis.title.x = element_blank()) +
ylab("daily number of tests")
py <- plotly::ggplotly(ggp)
htmlwidgets::saveWidget(plotly::as_widget(py), "testsPlot.html")
ggp <- ggplot(data = locDT, aes(x = test_date, y = pc_tests, fill = data)) +
geom_col() +
theme_classic(base_size = 18) +
theme(legend.title = element_blank(), axis.title.x = element_blank()) +
ylab("daily number of tests per capita")
py <- plotly::ggplotly(ggp)
htmlwidgets::saveWidget(plotly::as_widget(py), "pcTestsPlot.html")
ggp <- ggplot(data = locDT,
aes(x = test_date, y = percent_positive, color = data)) +
geom_point(size = 1, alpha = 0.5) +
geom_line(data = locDT,
aes(x = test_date, y = pctPosMn, color = data), size = 1.2) +
theme_classic(base_size = 18) +
theme(legend.title = element_blank(), axis.title.x = element_blank()) +
ylab("% positive")
py <- plotly::ggplotly(ggp)
htmlwidgets::saveWidget(plotly::as_widget(py), "pctPositivePlot.html")
ggp <- ggplot(data = prevData, aes(x = date, y = active.hospitalizations)) +
geom_col(fill = "grey60") +
theme_classic(base_size = 18) +
ylab("number of people hospitalized")
py <- plotly::ggplotly(ggp)
htmlwidgets::saveWidget(plotly::as_widget(py), "tcDOHhosp.html")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment