Skip to content

Instantly share code, notes, and snippets.

View tuanchris's full-sized avatar

Tuan Nguyen tuanchris

View GitHub Profile
[sqlfluff]
verbose = 0
nocolor = False
dialect = bigquery
templater = dbt
rules = None
exclude_rules = L034, L016, L044, L029,
recurse = 0
output_line_length = 100
runaway_limit = 10
# Transform, load, and check fact data
create_immigration_data = BigQueryOperator(
task_id = 'create_immigration_data',
use_legacy_sql = False,
params = {
'project_id': project_id,
'staging_dataset': staging_dataset,
'dwh_dataset': dwh_dataset
},
sql = './sql/F_IMMIGRATION_DATA.sql'
@tuanchris
tuanchris / fashionly_new_customers_favorite_category.sql
Last active September 2, 2022 03:34
fashionly_new_customers_favorite_category
WITH
first_orders AS (
SELECT
o.order_id,
o.user_id,
ROW_NUMBER() OVER(PARTITION BY o.user_id ORDER BY o.created_at ASC) order_sequence
FROM
`bigquery-public-data.thelook_ecommerce.orders` o
WHERE
o.status NOT IN ('Cancelled',
@tuanchris
tuanchris / fashionly_basic_query.sql
Last active September 2, 2022 02:54
fashionly_basic_query
SELECT
DATE(o.created_at) AS order_created_date,
SUM(oi.sale_price) AS revenue,
COUNT(DISTINCT o.order_id) AS order_count,
COUNT(DISTINCT o.user_id) AS customers_orderd
FROM
`bigquery-public-data.thelook_ecommerce.orders` o
LEFT JOIN
`bigquery-public-data.thelook_ecommerce.order_items` oi
ON
- name: Lint with sqlfluff
run: |
pip install sqlfluff
sqlfluff lint
# Linting only modified models
# git fetch origin main:main
# git diff main --name-only --diff-filter=d | egrep '^models/.*sql$$' | xargs -r sqlfluff lint
name: scheduled_run
on:
schedule:
- cron: '0 5,17 * * *'
env:
DBT_PROFILES_DIR: ./
DBT_GOOGLE_PROJECT_PROD: ${{ secrets.DBT_GOOGLE_PROJECT_PROD }}
DBT_GOOGLE_BIGQUERY_DATASET_PROD: ${{ secrets.DBT_GOOGLE_BIGQUERY_DATASET_PROD }}
name: pr_to_main
on:
pull_request:
branches:
- main
env:
DBT_PROFILES_DIR: ./
your_dbt_project: # Be sure to change this to your dbt project name
target: dev
outputs:
dev:
type: bigquery
method: service-account
keyfile: "{{ env_var('DBT_GOOGLE_BIGQUERY_KEYFILE_DEV') }}"
project: "{{ env_var('DBT_GOOGLE_PROJECT_DEV') }}"
dataset: "{{ env_var('DBT_GOOGLE_BIGQUERY_DATASET_DEV') }}"
threads: 32
# Name your project! Project names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'dbt_data_model'
version: '1.0.0'
config-version: 2
# This setting configures which "profile" dbt uses for this project.
profile: 'dbt_data_model'
@tuanchris
tuanchris / gcs.tf
Created August 15, 2020 09:18
cloud-iac-6
# GCS resources
resource "google_storage_bucket" "gcs-data-lake-landing" {
name = "${google_project.data-lake.project_id}-landing"
project = google_project.data-lake.project_id
location = local.region
force_destroy = true
storage_class = "STANDARD"
}
resource "google_storage_bucket" "gcs-data-lake-sensitive" {