Skip to content

Instantly share code, notes, and snippets.

@1ec5
Last active May 10, 2020 20:49
Show Gist options
  • Save 1ec5/291a1594d27fa783eaa283a2e529ab75 to your computer and use it in GitHub Desktop.
Save 1ec5/291a1594d27fa783eaa283a2e529ab75 to your computer and use it in GitHub Desktop.
Update the Wikimedia Commons table of COVID-19 cases in Santa Clara County, California
#!/bin/bash
# Fetch the current table as JSON
curl 'https://commons.wikimedia.org/wiki/Data:COVID-19_Cases_in_Santa_Clara_County,_California.tab?action=raw' > commons.json
# Fetch the new cases by day from the dashboard
# Convert date from number of milliseconds to YYYY-MM-DD
# Fill in counts repeated from previous days, which are indicated by C = null, R = 2
# Calculate a running total
curl 'https://wabi-us-gov-virginia-api.analysis.usgovcloudapi.net/public/reports/querydata' --compressed --data '{"version":"1.0.0","queries":[{"Query":{"Commands":[{"SemanticQueryDataShapeCommand":{"Query":{"Version":2,"From":[{"Name":"c","Entity":"cases_date"}],"Select":[{"Column":{"Expression":{"SourceRef":{"Source":"c"}},"Property":"Date"},"Name":"cases_date.Date"},{"Aggregation":{"Expression":{"Column":{"Expression":{"SourceRef":{"Source":"c"}},"Property":"New_cases"}},"Function":0},"Name":"Sum(cases_date.New_cases)"}]},"Binding":{"Primary":{"Groupings":[{"Projections":[0,1]}]},"DataReduction":{"DataVolume":4,"Primary":{"Sample":{}}},"Version":1}}}]},"CacheKey":"{\"Commands\":[{\"SemanticQueryDataShapeCommand\":{\"Query\":{\"Version\":2,\"From\":[{\"Name\":\"c\",\"Entity\":\"cases_date\"}],\"Select\":[{\"Column\":{\"Expression\":{\"SourceRef\":{\"Source\":\"c\"}},\"Property\":\"Date\"},\"Name\":\"cases_date.Date\"},{\"Aggregation\":{\"Expression\":{\"Column\":{\"Expression\":{\"SourceRef\":{\"Source\":\"c\"}},\"Property\":\"New_cases\"}},\"Function\":0},\"Name\":\"Sum(cases_date.New_cases)\"}]},\"Binding\":{\"Primary\":{\"Groupings\":[{\"Projections\":[0,1]}]},\"DataReduction\":{\"DataVolume\":4,\"Primary\":{\"Sample\":{}}},\"Version\":1}}}]}","QueryId":"","ApplicationContext":{"DatasetId":"366bfc6b-cdb9-43a4-9208-89ffd773dfe7","Sources":[{"ReportId":"f863b97c-85d7-431e-a6bf-20549f18d10f"}]}}],"cancelQueries":[],"modelId":320392}' | jq '.results[].result.data | .dsr.DS[].PH[].DM0[] | {date: ((.C[0] | . / 1000) | strftime("%Y-%m-%d")), newCases: .C[1]}' | jq -s 'foreach .[] as $row (0; ($row.newCases // .); . as $x | $row | (.newCases = (.newCases // $x)))' | jq -s 'foreach .[] as $row (0; . + $row.newCases; . as $x | $row | (.totalConfirmedCases = $x))' | jq -s 'map({(.date): . | del(.date)}) | add' > casesbyday.json
# Fetch the dashboard's UI, which contains the last updated date and number of undated cases
curl 'https://wabi-us-gov-virginia-api.analysis.usgovcloudapi.net/public/reports/e8619a1d-ea98-4d27-a860-157af0d4e93f/modelsAndExploration?preferReadOnlySession=true' --compressed -H 'X-PowerBI-ResourceKey: e8619a1d-ea98-4d27-a860-157af0d4e93f' > ui.json
# Fetch the current total number of cases, including undated cases
curl 'https://wabi-us-gov-virginia-api.analysis.usgovcloudapi.net/public/reports/querydata' --compressed --data $'{"version":"1.0.0","queries":[{"Query":{"Commands":[{"SemanticQueryDataShapeCommand":{"Query":{"Version":2,"From":[{"Name":"c","Entity":"counts"}],"Select":[{"Aggregation":{"Expression":{"Column":{"Expression":{"SourceRef":{"Source":"c"}},"Property":"Total"}},"Function":0},"Name":"Sum(counts.Total)"}],"Where":[{"Condition":{"In":{"Expressions":[{"Column":{"Expression":{"SourceRef":{"Source":"c"}},"Property":"Category"}}],"Values":[[{"Literal":{"Value":"\'Cases\'"}}]]}}}]},"Binding":{"Primary":{"Groupings":[{"Projections":[0]}]},"DataReduction":{"DataVolume":3,"Primary":{"Top":{}}},"Version":1}}}]},"CacheKey":"{\\"Commands\\":[{\\"SemanticQueryDataShapeCommand\\":{\\"Query\\":{\\"Version\\":2,\\"From\\":[{\\"Name\\":\\"c\\",\\"Entity\\":\\"counts\\"}],\\"Select\\":[{\\"Aggregation\\":{\\"Expression\\":{\\"Column\\":{\\"Expression\\":{\\"SourceRef\\":{\\"Source\\":\\"c\\"}},\\"Property\\":\\"Total\\"}},\\"Function\\":0},\\"Name\\":\\"Sum(counts.Total)\\"}],\\"Where\\":[{\\"Condition\\":{\\"In\\":{\\"Expressions\\":[{\\"Column\\":{\\"Expression\\":{\\"SourceRef\\":{\\"Source\\":\\"c\\"}},\\"Property\\":\\"Category\\"}}],\\"Values\\":[[{\\"Literal\\":{\\"Value\\":\\"\'Cases\'\\"}}]]}}}]},\\"Binding\\":{\\"Primary\\":{\\"Groupings\\":[{\\"Projections\\":[0]}]},\\"DataReduction\\":{\\"DataVolume\\":3,\\"Primary\\":{\\"Top\\":{}}},\\"Version\\":1}}}]}","QueryId":"","ApplicationContext":{"DatasetId":"366bfc6b-cdb9-43a4-9208-89ffd773dfe7","Sources":[{"ReportId":"f863b97c-85d7-431e-a6bf-20549f18d10f"}]}}],"cancelQueries":[],"modelId":320392}' > totalcases.json
# Fetch the current number of hospitalizations and isolate the number
curl 'https://wabi-us-gov-virginia-api.analysis.usgovcloudapi.net/public/reports/querydata' --compressed --data $'{"version":"1.0.0","queries":[{"Query":{"Commands":[{"SemanticQueryDataShapeCommand":{"Query":{"Version":2,"From":[{"Name":"h","Entity":"hosp_CHA"},{"Name":"subquery","Expression":{"Subquery":{"Query":{"Version":2,"From":[{"Name":"h1","Entity":"hosp_CHA"}],"Select":[{"Column":{"Expression":{"SourceRef":{"Source":"h1"}},"Property":"Date"},"Name":"field"}],"OrderBy":[{"Direction":2,"Expression":{"Aggregation":{"Expression":{"Column":{"Expression":{"SourceRef":{"Source":"h1"}},"Property":"Date"}},"Function":4}}}],"Top":1}}},"Type":2}],"Select":[{"Aggregation":{"Expression":{"Column":{"Expression":{"SourceRef":{"Source":"h"}},"Property":"COVID_pts"}},"Function":0},"Name":"Sum(hosp_CHA.COVID_pts)"}],"Where":[{"Condition":{"In":{"Expressions":[{"Column":{"Expression":{"SourceRef":{"Source":"h"}},"Property":"Date"}}],"Table":{"SourceRef":{"Source":"subquery"}}}}}]},"Binding":{"Primary":{"Groupings":[{"Projections":[0]}]},"DataReduction":{"DataVolume":3,"Primary":{"Top":{}}},"Version":1}}}]},"CacheKey":"{\\"Commands\\":[{\\"SemanticQueryDataShapeCommand\\":{\\"Query\\":{\\"Version\\":2,\\"From\\":[{\\"Name\\":\\"h\\",\\"Entity\\":\\"hosp_CHA\\"},{\\"Name\\":\\"subquery\\",\\"Expression\\":{\\"Subquery\\":{\\"Query\\":{\\"Version\\":2,\\"From\\":[{\\"Name\\":\\"h1\\",\\"Entity\\":\\"hosp_CHA\\"}],\\"Select\\":[{\\"Column\\":{\\"Expression\\":{\\"SourceRef\\":{\\"Source\\":\\"h1\\"}},\\"Property\\":\\"Date\\"},\\"Name\\":\\"field\\"}],\\"OrderBy\\":[{\\"Direction\\":2,\\"Expression\\":{\\"Aggregation\\":{\\"Expression\\":{\\"Column\\":{\\"Expression\\":{\\"SourceRef\\":{\\"Source\\":\\"h1\\"}},\\"Property\\":\\"Date\\"}},\\"Function\\":4}}}],\\"Top\\":1}}},\\"Type\\":2}],\\"Select\\":[{\\"Aggregation\\":{\\"Expression\\":{\\"Column\\":{\\"Expression\\":{\\"SourceRef\\":{\\"Source\\":\\"h\\"}},\\"Property\\":\\"COVID_pts\\"}},\\"Function\\":0},\\"Name\\":\\"Sum(hosp_CHA.COVID_pts)\\"}],\\"Where\\":[{\\"Condition\\":{\\"In\\":{\\"Expressions\\":[{\\"Column\\":{\\"Expression\\":{\\"SourceRef\\":{\\"Source\\":\\"h\\"}},\\"Property\\":\\"Date\\"}}],\\"Table\\":{\\"SourceRef\\":{\\"Source\\":\\"subquery\\"}}}}}]},\\"Binding\\":{\\"Primary\\":{\\"Groupings\\":[{\\"Projections\\":[0]}]},\\"DataReduction\\":{\\"DataVolume\\":3,\\"Primary\\":{\\"Top\\":{}}},\\"Version\\":1}}}]}","QueryId":"","ApplicationContext":{"DatasetId":"366bfc6b-cdb9-43a4-9208-89ffd773dfe7","Sources":[{"ReportId":"f863b97c-85d7-431e-a6bf-20549f18d10f"}]}}],"cancelQueries":[],"modelId":320392}' | jq '.results[].result.data | [.descriptor.Select[].Name, .dsr.DS[].PH[].DM0[].M0]' | jq -s 'map({(.[0]): (.[1])}) | add | .["Sum(hosp_CHA.COVID_pts)"]' > hosp.json
# Fetch the current death toll and isolate the number
curl 'https://wabi-us-gov-virginia-api.analysis.usgovcloudapi.net/public/reports/querydata' --compressed --data $'{"version":"1.0.0","queries":[{"Query":{"Commands":[{"SemanticQueryDataShapeCommand":{"Query":{"Version":2,"From":[{"Name":"c","Entity":"counts"}],"Select":[{"Aggregation":{"Expression":{"Column":{"Expression":{"SourceRef":{"Source":"c"}},"Property":"Total"}},"Function":0},"Name":"Sum(counts.Total)"}],"Where":[{"Condition":{"In":{"Expressions":[{"Column":{"Expression":{"SourceRef":{"Source":"c"}},"Property":"Category"}}],"Values":[[{"Literal":{"Value":"\'Deaths\'"}}]]}}}]},"Binding":{"Primary":{"Groupings":[{"Projections":[0]}]},"DataReduction":{"DataVolume":3,"Primary":{"Top":{}}},"Version":1}}}]},"CacheKey":"{\\"Commands\\":[{\\"SemanticQueryDataShapeCommand\\":{\\"Query\\":{\\"Version\\":2,\\"From\\":[{\\"Name\\":\\"c\\",\\"Entity\\":\\"counts\\"}],\\"Select\\":[{\\"Aggregation\\":{\\"Expression\\":{\\"Column\\":{\\"Expression\\":{\\"SourceRef\\":{\\"Source\\":\\"c\\"}},\\"Property\\":\\"Total\\"}},\\"Function\\":0},\\"Name\\":\\"Sum(counts.Total)\\"}],\\"Where\\":[{\\"Condition\\":{\\"In\\":{\\"Expressions\\":[{\\"Column\\":{\\"Expression\\":{\\"SourceRef\\":{\\"Source\\":\\"c\\"}},\\"Property\\":\\"Category\\"}}],\\"Values\\":[[{\\"Literal\\":{\\"Value\\":\\"\'Deaths\'\\"}}]]}}}]},\\"Binding\\":{\\"Primary\\":{\\"Groupings\\":[{\\"Projections\\":[0]}]},\\"DataReduction\\":{\\"DataVolume\\":3,\\"Primary\\":{\\"Top\\":{}}},\\"Version\\":1}}}]}","QueryId":"","ApplicationContext":{"DatasetId":"366bfc6b-cdb9-43a4-9208-89ffd773dfe7","Sources":[{"ReportId":"f863b97c-85d7-431e-a6bf-20549f18d10f"}]}}],"cancelQueries":[],"modelId":320392}' | jq '.results[].result.data | [.descriptor.Select[].Name, .dsr.DS[].PH[].DM0[].M0]' | jq -s 'map({(.[0]): (.[1])}) | add | .["Sum(counts.Total)"]' > deaths.json
# Compile statistics for today
# Convert last updated date from number of milliseconds to YYYY-MM-DD
# Total case count for today is the total case count as of today minus the undated case count
jq -s '{date: (.[0].models[].lastRefreshTime | match("/Date\\((\\d+)\\)/").captures[0].string | tonumber | . / 1000 | strftime("%Y-%m-%d")), totalConfirmedCases: (.[1].results[].result.data.dsr.DS[].PH[].DM0[].M0), undatedCases: (.[0].exploration.sections[].visualContainers[].config | match("The graphs do not include (\\d+) patients").captures[0].string | tonumber), hospitalizations: .[2], deaths: .[3]} | .totalConfirmedCases -= .undatedCases' ui.json totalcases.json hosp.json deaths.json > today.json
# Update the table's existing entries with new data from the dashboard, adding a row for today
# New case count for today is total case count for today minus total case count for yesterday
jq -s --tab '.[1] as $new | .[2] as $today | .[0] | .data = [.data[] | $new[.[0]] as $newDay | .[1] = ($newDay.newCases // .[1]) | .[2] = ($newDay.totalConfirmedCases // .[2])] | .data[-1] as $yesterday | .data += [[$today.date, ($today.totalConfirmedCases | . - $yesterday[2]), $today.totalConfirmedCases, $today.hospitalizations, $today.deaths, null, null, null, $today.undatedCases, "[https://www.sccgov.org/sites/phd/DiseaseInformation/novel-coronavirus/Pages/dashboard.aspx Santa Clara County Public Health]"]]' commons.json casesbyday.json today.json | expand -t4
@1ec5
Copy link
Author

1ec5 commented May 5, 2020

To update the Wikimedia Commons table of COVID-19 cases in Santa Clara County, California, based on the county health department’s coronavirus data dashboard, first install jq, then run this Bash script. The URLs come from inspecting the main Power BI dashboard’s network traffic, then trimming the requests down to just what's needed for the table on Wikimedia Commons.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment