Skip to content

Instantly share code, notes, and snippets.

View alcheng10's full-sized avatar

Albert Cheng alcheng10

View GitHub Profile

Date Islands Example

# Customer Contract and Status Dimension Table
CREATE TABLE Dim_Cust_Contract_Status AS (
SELECT 
    ROW_NUMBER() OVER (ORDER BY K.Customer_ID) AS Customer_Contract_Status_SK,
    K.Customer_ID,
    K.Contract_GUID,
    K.Contract_Start_Date,
@alcheng10
alcheng10 / Date_Dimension.md
Last active July 30, 2023 19:54
Python (Pandas) function that creates a dimension date with workday flag
import pandas as pd
import holidays

def create_dim_date(start_date, end_date):
    '''
    Create Dimension Date in Pandas
    
    :return df_date : DataFrame
    '''
# With your date dimension in df_date, join up and filter
data = df_date.join(data.set_index('Date'), how='left')

# Show all empty rows after the LEFT JOIN
data[data.isnull().any(axis=1)]
@alcheng10
alcheng10 / Daily_Date_Islands_Gap.md
Created March 11, 2020 10:47
Daily Date Island Gaps Example

Daily Date Island Gaps Example

SELECT
  dt.FullDate
  ,c.RegistrationGUID
  ,c.RegistrationNo
  ,c.FromDate AS CarRegistrationFromDate
  ,c.ToDate AS CarRegistrationToDate
@alcheng10
alcheng10 / Date_Daily_Dim_Table.md
Created March 11, 2020 10:52
Date Daily Dim Table Example
DROP TABLE IF EXISTS DimCustomer_Car_Status;

CREATE TABLE DimCustomer_Car_Status AS
WITH Joined_Data AS (
SELECT 
  o.RegistrationGUID
  ,o.RegistrationNo
  ,o.OwnerName
@alcheng10
alcheng10 / Date_Daily_Island_with_Rank.md
Created March 11, 2020 10:59
Date_Daily_Island_with_Rank example
# Example with Dense Rank!
WITH RANKED_DATA AS (
SELECT 
  FullDate
  ,RegistrationGUID
  ,RegistrationNo
  ,PolicyNo
  ,Cover_YN
  ,DENSE_RANK () OVER (
@alcheng10
alcheng10 / Date_Daily_Island_with_Rank_with_group.md
Created March 11, 2020 11:04
Date_Daily_Island_with_Rank_with_group
SELECT
  RegistrationGUID
  ,RegistrationNo
  ,PolicyNo
  ,MIN(FullDate) AS StartDate
  ,MAX(FullDate) AS EndDate
  ,Cover_YN
  ,COUNT(FullDate) AS NumberDaysCovered
@alcheng10
alcheng10 / AWS_Best_Practices.md
Last active November 29, 2022 06:22
AWS Serverless (Lambda, S3, SQS, API Gateway) Best Practices Checklist

AWS Best Practices Checklist - Serverless

This checklist uses markdown formatting and therefore can be easily incorporated into a git repository's README.md.

General

  • Tagging of stack and all resources - e.g. cost tags, microservices, projects
  • AWS resources are created via Infrastructure-as-Code (Terraform, Serverless.com, CDK, Pulumi, SAM, etc.)
  • Environment variables are passed during deployment (including Stage of stack, such as DEV, PROD, etc.)
  • RBAC and Least-Privilege Principle applied - IAM roles limited to only what is needed
@alcheng10
alcheng10 / gcp_bigquery_example.md
Last active May 6, 2020 08:39
GCP BigQuery SDK Example
from google.cloud import bigquery
from google.oauth2 import service_account
location = 'australia-southeast1'
project = 'project123'

client = bigquery.Client(project=project, location=location)


def upload_df_to_BQ(df, table_name, mode='append'):
@alcheng10
alcheng10 / ABS_RRPI_Cumulative_Growth.md
Last active May 9, 2020 06:27
Cumulative Growth of Australian Residential Property Prices

BigQuery SQL Script to get the rolling and cumulative percentage change for the Average Housing Price by State

WITH PRICES AS (
SELECT 
  Date
  ,State
  ,Residential_Dwelling_Average_Value_AUD AS AVERAGE_HOUSING_PRICE
  ,LAG(Residential_Dwelling_Average_Value_AUD) OVER (PARTITION BY State ORDER BY Date ASC) AS LAG_AVERAGE_HOUSING_PRICE
  ,ROW_NUMBER() OVER (PARTITION BY State ORDER BY Date ASC) AS RNK
FROM transformation.ABS_DWELLING_DATA_TRF