Skip to content

Instantly share code, notes, and snippets.

@jehiah
Created April 4, 2020 02:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jehiah/052c5d76dfc3ca6e2dfd0244d16e6650 to your computer and use it in GitHub Desktop.
Save jehiah/052c5d76dfc3ca6e2dfd0244d16e6650 to your computer and use it in GitHub Desktop.
Generate CSV of daily deltas from nychealth/coronavirus-data
#!/bin/zsh
#####
# This script provides a delta history of all updates to case-hosp-death.csv
# based on the AS_OF from summary.csv
# https://github.com/nychealth/coronavirus-data/blob/master/case-hosp-death.csv
# https://github.com/nychealth/coronavirus-data/blob/master/summary.csv
#
# By: Jehiah Czebotar
# https://jehiah.cz/
#
#
# Usage:
#
# run from a checkout of https://github.com/nychealth/coronavirus-data
#####
# find all the commits of case-hosp-death.csv
COMMITS=($(git log --format="%H" -- case-hosp-death.csv))
NEW_IFS=$(echo -e '\n\r\t ,')
# header
echo "AS_OF,DATE_OF_INTEREST,NEW_COVID_CASE_COUNT,HOSPITALIZED_CASE_COUNT,DEATH_COUNT"
# output the values from the first version of case-hosp-death.csv
AS_OF="March 25, 5:30 p.m." # from 2d90af0522c65ee77d6287a1f0469264648e3681
ORIGINAL_CSV=$(echo "$(git show ${COMMITS[${#COMMITS[@]}]}:case-hosp-death.csv && echo)" | gsed -e 's/\r$//g')
echo "$ORIGINAL_CSV" | while IFS="$NEW_IFS" read DATE_OF_INTEREST NEW_COVID_CASE_COUNT HOSPITALIZED_CASE_COUNT DEATH_COUNT; do
# ignore the header; $DATE_OF_INTEREST might have BOM so check second field
if [ "$NEW_COVID_CASE_COUNT" = "NEW_COVID_CASE_COUNT" ]; then
continue
fi
echo "\"${AS_OF}\",${DATE_OF_INTEREST},${NEW_COVID_CASE_COUNT},${HOSPITALIZED_CASE_COUNT},${DEATH_COUNT}"
done
# iterate each following version, and output only the delta
declare -A CASE_MAP
declare -A HOSPITALIZED_MAP
declare -A DEATH_MAP
for i in $(seq $(( ${#COMMITS[@]} - 1 )) -1 0); do
# compare between $COMMITS[$i] (more recent) and $COMMITS[$i + 1] (previous)
CASE_MAP=()
HOSPITALIZED_MAP=()
DEATH_MAP=()
# pre-processing. handle missing trailing newline, and stray carrrage return
# add extra newline at the end
PREVIOUS_CSV=$(echo "$(git show ${COMMITS[$(( $i + 1 ))]}:case-hosp-death.csv && echo)" | gsed -e 's/\r$//g')
CURRENT_CSV=$(echo "$(git show ${COMMITS[$i]}:case-hosp-death.csv && echo)" | gsed -e 's/\r$//g')
AS_OF=$(git show ${COMMITS[$i]}:summary.csv | tail -1 | awk -F '"' '{print $2}')
echo "$PREVIOUS_CSV" | while IFS="$NEW_IFS" read DATE_OF_INTEREST NEW_COVID_CASE_COUNT HOSPITALIZED_CASE_COUNT DEATH_COUNT; do
# ignore the header; $DATE_OF_INTEREST might have BOM so check second field
if [ "$NEW_COVID_CASE_COUNT" = "NEW_COVID_CASE_COUNT" ]; then
continue
fi
CASE_MAP[${DATE_OF_INTEREST}]=$NEW_COVID_CASE_COUNT
HOSPITALIZED_MAP[${DATE_OF_INTEREST}]=$HOSPITALIZED_CASE_COUNT
DEATH_MAP[${DATE_OF_INTEREST}]=${DEATH_COUNT%%[:space:]}
done
echo "$CURRENT_CSV" | while IFS="$NEW_IFS" read DATE_OF_INTEREST NEW_COVID_CASE_COUNT HOSPITALIZED_CASE_COUNT DEATH_COUNT; do
# ignore the header; $DATE_OF_INTEREST might have BOM so check second field
if [ "$NEW_COVID_CASE_COUNT" = "NEW_COVID_CASE_COUNT" ]; then
continue
fi
# DEBUG
# echo "DATE_OF_INTEREST $DATE_OF_INTEREST CASE $NEW_COVID_CASE_COUNT H $HOSPITALIZED_CASE_COUNT D $DEATH_COUNT"
# echo "PREV CASE ${CASE_MAP[$DATE_OF_INTEREST]} H ${HOSPITALIZED_MAP[$DATE_OF_INTEREST]} D ${DEATH_MAP[$DATE_OF_INTEREST]}"
if [ -z "$NEW_COVID_CASE_COUNT" ] || [ -z "${CASE_MAP[$DATE_OF_INTEREST]}" ]; then
CASE_DELTA="$NEW_COVID_CASE_COUNT"
else
CASE_DELTA=$(($NEW_COVID_CASE_COUNT - ${CASE_MAP[$DATE_OF_INTEREST]}))
fi
if [ -z "$HOSPITALIZED_CASE_COUNT" ] || [ -z "${HOSPITALIZED_MAP[$DATE_OF_INTEREST]}" ]; then
HOSPITALIZED_DELTA="$HOSPITALIZED_CASE_COUNT"
else
HOSPITALIZED_DELTA=$(($HOSPITALIZED_CASE_COUNT - ${HOSPITALIZED_MAP[$DATE_OF_INTEREST]}))
fi
if [ -z "$DEATH_COUNT" ] || [ -z "${DEATH_MAP[$DATE_OF_INTEREST]}" ]; then
DEATH_DELTA="$DEATH_COUNT"
else
DEATH_DELTA=$(($DEATH_COUNT - ${DEATH_MAP[$DATE_OF_INTEREST]}))
fi
if [ "${DATE_OF_INTEREST}" != 0 ] && [ "${CASE_DELTA}" != 0 ] && [ "${DEATH_DELTA}" != 0 ]; then
echo "\"${AS_OF}\",${DATE_OF_INTEREST},${CASE_DELTA},${HOSPITALIZED_DELTA},${DEATH_DELTA}"
fi
done
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment