Skip to content

Instantly share code, notes, and snippets.

View tuanchris's full-sized avatar

Tuan Nguyen tuanchris

View GitHub Profile
@tuanchris
tuanchris / fashionly_new_customers_favorite_category.sql
Last active September 2, 2022 03:34
fashionly_new_customers_favorite_category
WITH
first_orders AS (
SELECT
o.order_id,
o.user_id,
ROW_NUMBER() OVER(PARTITION BY o.user_id ORDER BY o.created_at ASC) order_sequence
FROM
`bigquery-public-data.thelook_ecommerce.orders` o
WHERE
o.status NOT IN ('Cancelled',
@tuanchris
tuanchris / fashionly_basic_query.sql
Last active September 2, 2022 02:54
fashionly_basic_query
SELECT
DATE(o.created_at) AS order_created_date,
SUM(oi.sale_price) AS revenue,
COUNT(DISTINCT o.order_id) AS order_count,
COUNT(DISTINCT o.user_id) AS customers_orderd
FROM
`bigquery-public-data.thelook_ecommerce.orders` o
LEFT JOIN
`bigquery-public-data.thelook_ecommerce.order_items` oi
ON
[sqlfluff]
verbose = 0
nocolor = False
dialect = bigquery
templater = dbt
rules = None
exclude_rules = L034, L016, L044, L029,
recurse = 0
output_line_length = 100
runaway_limit = 10
- name: Lint with sqlfluff
run: |
pip install sqlfluff
sqlfluff lint
# Linting only modified models
# git fetch origin main:main
# git diff main --name-only --diff-filter=d | egrep '^models/.*sql$$' | xargs -r sqlfluff lint
name: pr_to_main
on:
pull_request:
branches:
- main
env:
DBT_PROFILES_DIR: ./
name: scheduled_run
on:
schedule:
- cron: '0 5,17 * * *'
env:
DBT_PROFILES_DIR: ./
DBT_GOOGLE_PROJECT_PROD: ${{ secrets.DBT_GOOGLE_PROJECT_PROD }}
DBT_GOOGLE_BIGQUERY_DATASET_PROD: ${{ secrets.DBT_GOOGLE_BIGQUERY_DATASET_PROD }}
your_dbt_project: # Be sure to change this to your dbt project name
target: dev
outputs:
dev:
type: bigquery
method: service-account
keyfile: "{{ env_var('DBT_GOOGLE_BIGQUERY_KEYFILE_DEV') }}"
project: "{{ env_var('DBT_GOOGLE_PROJECT_DEV') }}"
dataset: "{{ env_var('DBT_GOOGLE_BIGQUERY_DATASET_DEV') }}"
threads: 32
# Name your project! Project names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'dbt_data_model'
version: '1.0.0'
config-version: 2
# This setting configures which "profile" dbt uses for this project.
profile: 'dbt_data_model'
@tuanchris
tuanchris / gcs.tf
Created August 15, 2020 09:18
cloud-iac-6
# GCS resources
resource "google_storage_bucket" "gcs-data-lake-landing" {
name = "${google_project.data-lake.project_id}-landing"
project = google_project.data-lake.project_id
location = local.region
force_destroy = true
storage_class = "STANDARD"
}
resource "google_storage_bucket" "gcs-data-lake-sensitive" {
@tuanchris
tuanchris / iam.tf
Created August 15, 2020 08:49
cloud-iac-5
# Sample IAM permission
module "project_iam_binding" {
source = "terraform-google-modules/iam/google//modules/projects_iam/"
projects = [google_project.data-lake.project_id]
mode = "additive"
bindings = {
# Bigquery
"roles/bigquery.admin" = ["user:${local.unique_id}-de@gmail.com"]
# GCS
"roles/storage.admin" = ["user:${local.unique_id}-de@gmail.com"]