Skip to content

Instantly share code, notes, and snippets.

@randypitcherii
Last active November 16, 2022 11:31
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 6 You must be signed in to fork a gist
  • Save randypitcherii/720dc5003c8a30c217ebd641b5bfc578 to your computer and use it in GitHub Desktop.
Save randypitcherii/720dc5003c8a30c217ebd641b5bfc578 to your computer and use it in GitHub Desktop.
SQL commands for Hashmap's slightly-modified Zero to Snowflake demonstrations. Find the original, official SQL from Snowflake at https://bit.ly/2JJZl3J
// This SQL file is for the Hands On Lab Guide for the 30-day free Snowflake trial account
// The numbers below correspond to the sections of the Lab Guide in which SQL is to be run in a Snowflake worksheet
// Modules 1 and 2 of the Lab Guide have no SQL to be run
// See the lab guide here - https://s3.amazonaws.com/snowflake-workshop-lab/InpersonZTS_LabGuide.pdf
//=====================================
// MODULE 1
// Steps to Prepare Your Lab Environment
//=====================================
/**
* If not yet done, register for a Snowflake free 30-day
* trial at https://trial.snowflake.com
*
* That's it!
*/
//=====================================
//=====================================
// MODULE 2
// The Snowflake UI & Lab Story
//=====================================
/**
* 1. Log in to your trial Snowflake instance (check your email for login details)
* 2. Close any welcome boxes or tutorials
* 3. UI Tour
* - Contexts - Warehouses
* - Databases - Worksheets
* - Shares - History
*
* Lab Story:
* This Snowflake lab will be done as part of a theoretical real-world “story”
* to help you better understand why we are performing the steps in this lab and
* in the order they appear.
*
* The “story” of this lab is based on the analytics team at Citi Bike, a real, citywide bike
* share system in New York City, USA. This team wants to be able to run analytics on
* data to better understand their riders and how to serve them best.
*
* We will first load structured .csv data from rider transactions into Snowflake.
* This comes from Citi Bike internal transactional systems. Then later we will
* load open-source, semi-structured JSON weather data into Snowflake to see if there is
* any correlation between the number of bike rides and weather.
*
*/
//=====================================
//=====================================
// MODULE 3
// Preparing to Load Data
//=====================================
// 3.1.1 | Done in UI
USE ROLE SYSADMIN;
CREATE DATABASE CITIBIKE;
// 3.1.3 | Done in UI
USE DATABASE CITIBIKE;
USE SCHEMA CITIBIKE.PUBLIC;
// 3.1.4
CREATE OR REPLACE TABLE
CITIBIKE.PUBLIC.TRIPS (
TRIPDURATION INTEGER,
STARTTIME TIMESTAMP,
STOPTIME TIMESTAMP,
START_STATION_ID INTEGER,
START_STATION_NAME STRING,
START_STATION_LATITUDE FLOAT,
START_STATION_LONGITUDE FLOAT,
END_STATION_ID INTEGER,
END_STATION_NAME STRING,
END_STATION_LATITUDE FLOAT,
END_STATION_LONGITUDE FLOAT,
BIKEID INTEGER,
MEMBERSHIP_TYPE STRING,
USERTYPE STRING,
BIRTH_YEAR INTEGER,
GENDER INTEGER
);
// 3.2.3 | Done in UI
CREATE OR REPLACE STAGE
CITIBIKE.PUBLIC.CITIBIKE_TRIPS
URL='s3://snowflake-workshop-lab/citibike-trips';
// 3.2.4
LIST @CITIBIKE_TRIPS;
// 3.3.2 | Done in UI
CREATE OR REPLACE FILE FORMAT
CITIBIKE.PUBLIC.CSV
TYPE = 'CSV'
FIELD_OPTIONALLY_ENCLOSED_BY = '\042'
NULL_IF = ('NULL','null','')
ERROR_ON_COLUMN_COUNT_MISMATCH = FALSE;
//=====================================
//=====================================
// MODULE 4
// Loading Data
//=====================================
// 4.1.1 | Done in UI
CREATE OR REPLACE WAREHOUSE
COMPUTE_WH
WAREHOUSE_SIZE=SMALL
AUTO_SUSPEND=60;
// 4.2.1 | Done in UI
USE ROLE SYSADMIN;
USE WAREHOUSE COMPUTE_WH;
USE DATABASE CITIBIKE;
USE SCHEMA CITIBIKE.PUBLIC;
// 4.2.2
COPY INTO
CITIBIKE.PUBLIC.TRIPS
FROM
@CITIBIKE.PUBLIC.CITIBIKE_TRIPS
FILE_FORMAT=CITIBIKE.PUBLIC.CSV
ON_ERROR=CONTINUE;
// 4.2.5
TRUNCATE TABLE CITIBIKE.PUBLIC.TRIPS;
// 4.2.6 | Done in UI
ALTER WAREHOUSE
COMPUTE_WH
SET
WAREHOUSE_SIZE=LARGE;
// 4.2.7
COPY INTO
CITIBIKE.PUBLIC.TRIPS
FROM
@CITIBIKE.PUBLIC.CITIBIKE_TRIPS
FILE_FORMAT=CITIBIKE.PUBLIC.CSV
ON_ERROR=CONTINUE;
// 4.3.1 | Done in UI
CREATE WAREHOUSE IF NOT EXISTS
ANALYTICS_WH
WAREHOUSE_SIZE=LARGE
AUTO_SUSPEND=60;
//=====================================
//=====================================
// MODULE 5
// Analytical Queries, Result Cache, Cloning
//=====================================
// 5.1.1 | Done in UI
USE ROLE SYSADMIN;
USE WAREHOUSE ANALYTICS_WH;
USE DATABASE CITIBIKE;
USE SCHEMA CITIBIKE.PUBLIC;
// 5.1.2
SELECT * FROM CITIBIKE.PUBLIC.TRIPS LIMIT 20;
// 5.1.3
SELECT
DATE_TRUNC('hour', STARTTIME) AS DATE,
COUNT(*) AS NUM_TRIPS,
AVG(TRIPDURATION)/60 AS AVG_DURATION_IN_MINUTES,
AVG(
HAVERSINE(
START_STATION_LATITUDE,
START_STATION_LONGITUDE,
END_STATION_LATITUDE,
END_STATION_LONGITUDE
)
) AS AVG_DISTANCE_IN_KILOMETERS
FROM
CITIBIKE.PUBLIC.TRIPS
GROUP BY
DATE
ORDER BY
DATE;
// 5.1.4
SELECT
DATE_TRUNC('hour', STARTTIME) AS DATE,
COUNT(*) AS NUM_TRIPS,
AVG(TRIPDURATION)/60 AS AVG_DURATION_IN_MINUTES,
AVG(
HAVERSINE(
START_STATION_LATITUDE,
START_STATION_LONGITUDE,
END_STATION_LATITUDE,
END_STATION_LONGITUDE
)
) AS AVG_DISTANCE_IN_KILOMETERS
FROM
CITIBIKE.PUBLIC.TRIPS
GROUP BY
DATE
ORDER BY
DATE;
// 5.1.5
SELECT
DAYNAME(STARTTIME) AS WEEKDAY,
COUNT(*) AS NUM_TRIPS
FROM
CITIBIKE.PUBLIC.TRIPS
GROUP BY
WEEKDAY
ORDER BY
NUM_TRIPS DESC;
// 5.2.1
CREATE TABLE
CITIBIKE.PUBLIC.TRIPS_DEV
CLONE
CITIBIKE.PUBLIC.TRIPS;
//=====================================
//=====================================
// MODULE 6
// Working with Semi-Structured Data,
// Views, and JOINs
//=====================================
// 6.1.1
CREATE DATABASE WEATHER;
// 6.1.2
USE ROLE SYSADMIN;
USE WAREHOUSE COMPUTE_WH;
USE DATABASE WEATHER;
USE SCHEMA PUBLIC;
// 6.1.3
CREATE TABLE
WEATHER.PUBLIC.JSON_WEATHER_DATA(
V VARIANT
);
// 6.2.1
CREATE STAGE
WEATHER.PUBLIC.NYC_WEATHER
URL='s3://snowflake-workshop-lab/weather-nyc';
// 6.2.2
LIST @WEATHER.PUBLIC.NYC_WEATHER;
// 6.3.1
COPY INTO
WEATHER.PUBLIC.JSON_WEATHER_DATA
FROM
@WEATHER.PUBLIC.NYC_WEATHER
FILE_FORMAT=(
TYPE=JSON
);
// 6.3.2
SELECT * FROM WEATHER.PUBLIC.JSON_WEATHER_DATA LIMIT 10;
// 6.4.1
CREATE VIEW
WEATHER.PUBLIC.JSON_WEATHER_DATA_VIEW
AS (
SELECT
V:"time"::TIMESTAMP AS OBSERVATION_TIME,
V:"city"."id"::INT AS CITY_ID,
V:"city"."name"::STRING AS CITY_NAME,
V:"city"."country"::STRING AS COUNTRY,
V:"city"."coord"."lat"::FLOAT AS CITY_LAT,
V:"city"."coord"."lon"::FLOAT AS CITY_LON,
V:"clouds"."all"::INT AS CLOUDS,
(V:"main"."temp"::FLOAT) - 273.15 AS TEMP_AVG,
(V:"main"."temp_min"::FLOAT) - 273.15 AS TEMP_MIN,
(V:"main"."temp_max"::FLOAT) - 273.15 AS TEMP_MAX,
V:"weather"[0]."main"::STRING AS WEATHER,
V:"weather"[0]."description"::STRING AS WEATHER_DESC,
V:"weather"[0]."icon"::STRING AS WEATHER_ICON,
V:"wind"."deg"::FLOAT AS WIND_DIR,
V:"wind"."speed"::FLOAT AS WIND_SPEED
FROM
WEATHER.PUBLIC.JSON_WEATHER_DATA
WHERE
CITY_ID = 5128638
);
// 6.4.4
SELECT
*
FROM
WEATHER.PUBLIC.JSON_WEATHER_DATA_VIEW
WHERE
DATE_TRUNC('month', OBSERVATION_TIME) = '2018-01-01'
LIMIT 20;
// 6.5.1
SELECT
WEATHER AS CONDITIONS,
COUNT(*) AS NUM_TRIPS
FROM (
CITIBIKE.PUBLIC.TRIPS
LEFT OUTER JOIN
JSON_WEATHER_DATA_VIEW
ON
DATE_TRUNC('HOUR', OBSERVATION_TIME) = DATE_TRUNC('HOUR', STARTTIME)
)
WHERE
CONDITIONS IS NOT NULL
GROUP BY
WEATHER
ORDER BY
NUM_TRIPS DESC;
//=====================================
//=====================================
// MODULE 7
// Using Time Travel
//=====================================
// 7.1.1
DROP TABLE WEATHER.PUBLIC.JSON_WEATHER_DATA;
// 7.1.2
SELECT * FROM JSON_WEATHER_DATA LIMIT 10;
// 7.1.3
UNDROP TABLE WEATHER.PUBLIC.JSON_WEATHER_DATA;
// 7.2.1
USE ROLE SYSADMIN;
USE WAREHOUSE COMPUTE_WH;
USE DATABASE CITIBIKE;
USE SCHEMA PUBLIC;
// 7.2.2
UPDATE
CITIBIKE.PUBLIC.TRIPS
SET
START_STATION_NAME = 'oops';
// 7.2.3
SELECT
START_STATION_NAME AS STATION,
COUNT(*) AS RIDES
FROM
CITIBIKE.PUBLIC.TRIPS
GROUP BY
STATION
ORDER BY
RIDES DESC
LIMIT 20;
// 7.2.4
SET QUERY_ID = (
SELECT
QUERY_ID
FROM
TABLE(CITIBIKE.INFORMATION_SCHEMA.QUERY_HISTORY_BY_SESSION (RESULT_LIMIT=>5))
WHERE
QUERY_TEXT LIKE 'UPDATE%'
ORDER BY
START_TIME
LIMIT 1
);
SELECT $QUERY_ID; // Check your query id
// 7.2.5
CREATE OR REPLACE TABLE
CITIBIKE.PUBLIC.TRIPS
AS (
SELECT
*
FROM
CITIBIKE.PUBLIC.TRIPS
BEFORE
(STATEMENT => $QUERY_ID)
);
// 7.2.6
SELECT
START_STATION_NAME AS STATION,
COUNT(*) AS RIDES
FROM
CITIBIKE.PUBLIC.TRIPS
GROUP BY
STATION
ORDER BY
RIDES DESC
LIMIT 20;
//=====================================
//=====================================
// MODULE 8
// Roles-Based Access Control (RBAC)
// and Account Administration
//=====================================
// 8.1.1
USE ROLE SECURITYADMIN;
// 8.1.3 (NOTE - enter your unique user name into the second row below)
CREATE ROLE JUNIOR_DBA;
GRANT ROLE JUNIOR_DBA TO USER <YOUR_USER_NAME_GOES_HERE>;
// 8.1.4
USE ROLE JUNIOR_DBA;
// 8.1.6
USE ROLE SECURITYADMIN;
GRANT USAGE ON DATABASE CITIBIKE TO ROLE JUNIOR_DBA;
GRANT USAGE ON DATABASE WEATHER TO ROLE JUNIOR_DBA;
// 8.1.7
USE ROLE JUNIOR_DBA;
//=====================================
//=====================================
// MODULE 9
// Data Sharing
//=====================================
/**
* Snowflake enables account-to-account sharing of data through shares, which are
* created by data providers and “imported” by data consumers, either through
* their own Snowflake account or a provisioned Snowflake Reader account.
* The consumer could be an external entity/partner, or a different internal
* business unit which is required to have its own, unique Snowflake account.
*
* Return to the Lab Guide PDF at:
* https://s3.amazonaws.com/snowflake-workshop-lab/InpersonZTS_LabGuide.pdf
* and go to page 51.
*
* We will wrap up this lab with an introduction to data sharing in
* the Snowflake web UI.
*/
//=====================================
//=====================================
// OPTIONAL - Cleanup
//=====================================
USE ROLE ACCOUNTADMIN;
DROP SHARE IF EXISTS TRIPS_SHARE;
DROP DATABASE IF EXISTS CITIBIKE;
DROP DATABASE IF EXISTS WEATHER;
DROP WAREHOUSE IF EXISTS ANALYTICS_WH;
DROP WAREHOUSE IF EXISTS COMPUTE_WH;
DROP ROLE IF EXISTS JUNIOR_DBA;
//=====================================
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment