Skip to content

Instantly share code, notes, and snippets.

View monimiller's full-sized avatar

Monica Miller monimiller

View GitHub Profile
@monimiller
monimiller / ETL.sql
Created July 24, 2024 11:43
ETL Mythbusters SQL
CREATE SCHEMA blue_bikes.<yourname>;
CREATE TABLE bb_bronze_temp_trips (
trip_seconds varchar,
start_time varchar,
stop_time varchar,
start_station_id varchar,
start_station_name varchar,
start_station_latitude varchar,
start_station_longitude varchar,
CREATE TABLE
orders (
event_id INTEGER,
created_ts TIMESTAMP,
metric INTEGER,
year VARCHAR,
month VARCHAR,
day VARCHAR
);
CREATE TABLE enigma_jhu (
   fips VARCHAR,
   admin2 VARCHAR,
   province_state VARCHAR,
   country_region VARCHAR,
   last_update VARCHAR,
   latitude DOUBLE,
   longitude DOUBLE,
 confirmed INTEGER,
create schema aws_covid_data_lake with (location='s3://dbt-aws-<username>/');
ALTER TABLE <GALAXY_DESTINATION_TABLE_NAME>
    DROP COLUMN <NEW_COLUMN_NAME>;
    
-- Example:
ALTER TABLE test_table
    DROP COLUMN new_column;
ALTER TABLE <GALAXY_DESTINATION_TABLE_NAME>
    ADD COLUMN <NEW_COLUMN_NAME> <NEW_COLUMN_TYPE>;
    
-- Example:
ALTER TABLE test_table
    ADD COLUMN new_column VARCHAR;
DELETE FROM <GALAXY_DESTINATION_TABLE_NAME>
    WHERE <TIMESTAMP_PARTITIONING_COLUMN> < date_trunc('day', now() AT TIME ZONE <TIMEZONE> - <SQL_TIME_INTERVAL>);

-- Daily Example:
DELETE FROM test_table
    WHERE timestamp < date_trunc('day', now() AT TIME ZONE 'UTC' - INTERVAL '30' DAY);
ALTER TABLE <GALAXY_DESTINATION_TABLE_NAME>
   EXECUTE expire_snapshots(retention_threshold => '7d');

-- Daily Example:
ALTER TABLE test_table
   EXECUTE expire_snapshots(retention_threshold => '7d');
ALTER TABLE <GALAXY_DESTINATION_TABLE_NAME>
    EXECUTE OPTIMIZE
    WHERE CAST(<TIMESTAMP_PARTITIONING_COLUMN> AS DATE) >= CAST(now() - <SQL_TIME_INTERVAL> AS DATE);

-- Daily Example:
ALTER TABLE test_table
   EXECUTE OPTIMIZE
   WHERE CAST(timestamp AS DATE) >= CAST(now() - INTERVAL '2' DAY AS DATE);
CREATE TABLE events ( 
  id number, 
  day_id varchar, 
  other columns … ) 
WITH ( 
  type = ‘delta’, 
  partitioned_by = array[’day_id’] 
);