Skip to content

Instantly share code, notes, and snippets.

View BigDataDave1's full-sized avatar

BigDataDave BigDataDave1

View GitHub Profile
@BigDataDave1
BigDataDave1 / vhol_sept2020_wunderman.sql
Created November 25, 2020 13:19
Wunderman Snowflake SQL
--+-----------------------------------------------------------------------------------------------+--
-- Wunderman Risk & Recovery Data
--+-----------------------------------------------------------------------------------------------+--
/* --GET() Wunderman Thompson AmeriLINK Insights Data Set as "WUNDERMAN"
---> Queries we will write <--
+ Age by State
+ 100k Infections by Avg(Age) by State
+ Respitory Issues per 100k by State
+ 100k Infections by Respitory Issues per 100k by State
+ Income by State
--+-----------------------------------------------------------------------------------------------+--
-- Weather Source Data
--+-----------------------------------------------------------------------------------------------+--
--GET() Weather Source Data As "WEATHERSOURCE_PARTNER_WS_ONPOINT_WEATHER_DATA_SHARE"
SELECT COUNT(*)
FROM WEATHERSOURCE_PARTNER_WS_ONPOINT_WEATHER_DATA_SHARE.PUBLIC.HISTORY_DAY_COVID_19;
--Simple Test Query
SELECT DATE_VALID_STD, AVG(AVG_TEMPERATURE_AIR_2M_F)
FROM WEATHERSOURCE_PARTNER_WS_ONPOINT_WEATHER_DATA_SHARE.PUBLIC.HISTORY_DAY_COVID_19
@BigDataDave1
BigDataDave1 / vhol_sept2020_safegraph.sql
Created November 25, 2020 13:16
Safegraph Data SQL
--+-----------------------------------------------------------------------------------------------+--
-- Safe Graph Data
--+-----------------------------------------------------------------------------------------------+--
--GET() Safe Graph Data As "SAFEGRAPH_SHARE"
SELECT FIPS, PROVINCE_STATE, COUNTY FROM STARSCHEMA_COVID19.PUBLIC.JHU_COVID_19
WHERE STARSCHEMA_COVID19.PUBLIC.JHU_COVID_19.COUNTRY_REGION = $COUNTRY_VARIABLE
AND STARSCHEMA_COVID19.PUBLIC.JHU_COVID_19.DATE = TO_DATE($DATE_VARIABLE);
--Result Set: 100k Population SQL Result Set
SELECT PROVINCE_STATE, SUM(CONFIRMED_CASES)/SUM(COUNTY_POPULATION)*100000 AS CASES_PER_100K,
@BigDataDave1
BigDataDave1 / vhol_sept2020_jhu.sql
Created November 25, 2020 13:13
Starschema's JHU Data Table SQL
--+-----------------------------------------------------------------------------------------------+--
--+-----------------------------------------------------------------------------------------------+--
-- <Webinar SQL>
-- <Getting Started with the Snowflake Data Marketplace>
-- <September 9, 2020 | 10:00am PST>
-- <SQL File | David A Spezia>
-- <Sales Engineer | Snowflake>
--+-----------------------------------------------------------------------------------------------+--
--+-----------------------------------------------------------------------------------------------+--
--Session Variables
@BigDataDave1
BigDataDave1 / query10_10things.sql
Created November 23, 2020 22:12
10 Things Every Snowflake Admin Should be Doing to Optimize Credits Query 10 – Apply Resource Monitors
CREATE RESOURCE MONITOR "CURTLOVESCUBES_RM" WITH CREDIT_QUOTA = 150
TRIGGERS
ON 75 PERCENT DO NOTIFY
ON 90 PERCENT DO NOTIFY;
ALTER WAREHOUSE "CURT_WH" SET RESOURCE_MONITOR = "CURTLOVESCUBES_RM";
@BigDataDave1
BigDataDave1 / query09_10things.sql
Created November 23, 2020 22:12
10 Things Every Snowflake Admin Should be Doing to Optimize Credits Query 09 – Find Warehouses Without Resource Monitors
SHOW WAREHOUSES;
SELECT * FROM TABLE(RESULT_SCAN(LAST_QUERY_ID()))
WHERE "resource_monitor" = 'null';
@BigDataDave1
BigDataDave1 / query08_10things.sql
Created November 23, 2020 22:11
10 Things Every Snowflake Admin Should be Doing to Optimize Credits Query 08 – Dormant Users
--Never Logged In Users
SHOW USERS;
SELECT * FROM TABLE(RESULT_SCAN(LAST_QUERY_ID()))
WHERE "last_success_login" IS NULL
AND DATEDIFF('Day',"created_on",CURRENT_DATE) > 30;
--Stale Users
SHOW USERS;
SELECT * FROM TABLE(RESULT_SCAN(LAST_QUERY_ID()))
WHERE DATEDIFF('Day',"last_success_login",CURRENT_DATE) > 30;
@BigDataDave1
BigDataDave1 / query07_10things.sql
Created November 23, 2020 22:11
10 Things Every Snowflake Admin Should be Doing to Optimize Credits Query 07 – Unused Tables
--DML from the Information Schema to identify Table sizes and Last Updated Timestamps
SELECT TABLE_CATALOG || '.' || TABLE_SCHEMA || '.' || TABLE_NAME AS TABLE_PATH,
TABLE_NAME, TABLE_SCHEMA AS SCHEMA,
TABLE_CATALOG AS DATABASE, BYTES,
TO_NUMBER(BYTES / POWER(1024,3),10,2) AS GB,
LAST_ALTERED AS LAST_USE,
DATEDIFF('Day',LAST_USE,CURRENT_DATE) AS DAYS_SINCE_LAST_USE
FROM INFORMATION_SCHEMA.TABLES
WHERE DAYS_SINCE_LAST_USE > 90 --Use your Days Threshold
ORDER BY BYTES DESC;
@BigDataDave1
BigDataDave1 / query06_10things.sql
Created November 23, 2020 22:10
10 Things Every Snowflake Admin Should be Doing to Optimize Credits Query 06 – Warehouses Approaching Cloud Service Billing Threshold
WITH
cloudServices AS (SELECT
WAREHOUSE_NAME, MONTH(START_TIME) AS MONTH,
SUM(CREDITS_USED_CLOUD_SERVICES) AS CLOUD_SERVICES_CREDITS,
COUNT(*) AS NO_QUERYS
FROM "SNOWFLAKE"."ACCOUNT_USAGE"."QUERY_HISTORY"
GROUP BY WAREHOUSE_NAME,MONTH
ORDER BY WAREHOUSE_NAME,NO_QUERYS DESC),
warehouseMetering AS (SELECT
WAREHOUSE_NAME, MONTH(START_TIME) AS MONTH,
@BigDataDave1
BigDataDave1 / query05_10things.sql
Created November 23, 2020 22:09
10 Things Every Snowflake Admin Should be Doing to Optimize Credits Query 05 – Warehouse Credit Usage Greater Than 7 Day Average
SELECT WAREHOUSE_NAME, DATE(START_TIME) AS DATE,
SUM(CREDITS_USED) AS CREDITS_USED,
AVG(SUM(CREDITS_USED)) OVER (PARTITION BY WAREHOUSE_NAME ORDER BY DATE ROWS 7 PRECEDING) AS CREDITS_USED_7_DAY_AVG,
(TO_NUMERIC(SUM(CREDITS_USED)/CREDITS_USED_7_DAY_AVG*100,10,2)-100)::STRING || '%' AS VARIANCE_TO_7_DAY_AVERAGE
FROM "SNOWFLAKE"."ACCOUNT_USAGE"."WAREHOUSE_METERING_HISTORY"
GROUP BY DATE, WAREHOUSE_NAME
ORDER BY DATE DESC;