Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save n0531m/0c542252da8474dcf342db8add0ab88f to your computer and use it in GitHub Desktop.
Save n0531m/0c542252da8474dcf342db8add0ab88f to your computer and use it in GitHub Desktop.
covic19_publicdataset_locationname_fluctuation
#DECLARE # PRECISION INT64 DEFAULT 6;
DECLARE PRECISION_GEOHASH INT64 DEFAULT 2;
WITH # first table : only aggregating the rows with exact match on country_region, province_state, longitude, latitude
# also rounding lat/lng to a set precision. (defined in declard param)
# also adding a geohash value based point point
t1 AS
( SELECT CASE
WHEN country_region IN ("South Korea",
"Korea, South",
"Republic of Korea") THEN "South Korea"
WHEN country_region IN ("Mainland China") THEN "China"
WHEN country_region IN ("Vietnam",
"Viet Nam") THEN "Vietnam"
WHEN country_region IN ("Bahamas",
"The Bahamas") THEN "Bahamas"
WHEN country_region IN ("Russia",
"Russian Federation") THEN "Russia"
WHEN country_region IN ("Gambia",
"The Gambia") THEN "Gambia"
WHEN country_region IN ("Moldova",
"Republic of Moldova") THEN "Moldova"
WHEN country_region IN ("Gambia",
"Gambia, The") THEN "Gambia"
WHEN country_region IN ("Taiwan",
"Taiwan*",
"Taipei and environs") THEN "Taiwan"
WHEN country_region IN ("Republic of Ireland",
"Ireland") THEN "Ireland"
WHEN country_region IN ("Iran (Islamic Republic of)",
"Iran") THEN "Iran"
WHEN country_region IN ("Czechia",
"Czech Republic") THEN "Czech Republic"
WHEN province_state IN ("Hong Kong") THEN "Hong Kong"
WHEN province_state IN ("Macau") THEN "Macau"
WHEN country_region IN ("Guam")
AND province_state IS NULL THEN "US"
WHEN country_region IN ("Cayman Islands")
AND province_state IS NULL THEN "United Kingdom"
WHEN country_region IN ("Aruba")
AND province_state IS NULL THEN "Netherlands"
WHEN country_region IN ("Curacao")
AND province_state IS NULL THEN "Netherlands"
WHEN country_region IN ("Guadeloupe")
AND province_state IS NULL THEN "France"
WHEN country_region IN ("Mayotte")
AND province_state IS NULL THEN "France"
WHEN country_region IN ("French Guiana")
AND province_state IS NULL THEN "France"
WHEN country_region IN ("Saint Barthelemy")
AND province_state IS NULL THEN "France"
WHEN country_region IN ("Gibraltar")
AND province_state IS NULL THEN "United Kingdom"
WHEN country_region IN ("US")
AND province_state IN ("Puerto Rico") THEN "Puerto Rico"
ELSE trim(country_region)
END AS country_region,
CASE
WHEN country_region = province_state THEN NULL
WHEN country_region IN ("US")
AND province_state IN ("Virgin Islands",
"Virgin Islands, U.S.",
"United States Virgin Islands") THEN "Virgin Islands"
WHEN country_region IN ("US")
AND province_state IN ("Santa Clara, CA",
"Santa Clara County, CA") THEN "Santa Clara Country, CA"
WHEN country_region IN ("US")
AND province_state IN ("Orange, CA",
"Orange County, CA") THEN "Orange Country, CA"
WHEN province_state IN ("Fench Guiana") THEN "French Guiana"
WHEN country_region IN ("Guam")
AND province_state IS NULL THEN "Guam"
WHEN country_region IN ("Cayman Islands")
AND province_state IS NULL THEN "Cayman Islands"
WHEN country_region IN ("Aruba")
AND province_state IS NULL THEN "Aruba"
WHEN country_region IN ("Curacao")
AND province_state IS NULL THEN "Curacao"
WHEN country_region IN ("Guadeloupe")
AND province_state IS NULL THEN "Guadeloupe"
WHEN country_region IN ("Mayotte")
AND province_state IS NULL THEN "Mayotte"
WHEN country_region IN ("French Guiana")
AND province_state IS NULL THEN "French Guiana"
WHEN country_region IN ("US")
AND province_state IN ("Puerto Rico") THEN NULL
WHEN country_region IN ("Saint Barthelemy")
AND province_state IS NULL THEN "Saint Barthelemy"
WHEN country_region IN ("Gibraltar")
AND province_state IS NULL THEN "Gibraltar"
WHEN country_region IN ("United Kingdom")
AND province_state IN ("UK") THEN NULL
WHEN province_state IN ("Hong Kong") THEN NULL
WHEN province_state IN ("Macau") THEN NULL
WHEN province_state IN ("Taiwan") THEN NULL #when province_state IN ("",
"") THEN ""
ELSE trim(province_state)
END AS province_state,
#ROUND(latitude,PRECISION) AS lat,
latitude,
longitude,
#ROUND(longitude,PRECISION) AS lng,
ST_GEOGPOINT(longitude,latitude) AS point,
ST_GEOHASH(ST_GEOGPOINT(longitude, latitude), PRECISION_GEOHASH) AS geohash,
COUNT(*) AS numOfRows
FROM `bigquery-public-data.covid19_jhu_csse.summary`
GROUP BY country_region,
province_state,
longitude,
latitude), # second table aggregated based on rounded numbers
t2 AS
( SELECT country_region,
province_state, # lat AS latitude,
# lng AS longitude,
latitude,
longitude,
ST_ASTEXT(point) AS point,
geohash ,
SUM(numOfRows) AS numOfRows
FROM t1
GROUP BY country_region,
province_state,
latitude,
longitude,
geohash,
ST_ASTEXT(point)),
t3 AS
( SELECT *
FROM t2
WHERE # excluding datapoints with missing lat/lng
geohash IS NOT NULL # excluding datapoints with latlng of (0,0)
AND geohash!=st_geohash(st_geogpoint(0, 0), PRECISION_GEOHASH) #WHERE # country_region LIKE "%Mal%" #where province_state is not null and country_region is null
#WHERE # country_region LIKE "%Korea%"
ORDER BY geohash),
t4 AS
( SELECT *
FROM t3),
t5 AS (
SELECT geohash,
#ARRAY_AGG( STRUCT(country_region, # province_state)) as labels
ARRAY_AGG(DISTINCT CONCAT( IF (country_region IS NULL, "[null]", country_region), # " || ", IF (province_state IS NULL, "[null]", province_state))) AS labels # ORDER BY
# country_region, province_state) AS labels
, sum(numOfRows) AS numOfRows
FROM t4
GROUP BY geohash #, #country_region, #province_state )
SELECT geohash,
labels,
numOfRows
FROM t5
WHERE ARRAY_LENGTH(labels)>1
ORDER BY geohash
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment