First run this in snowflake:
(
src variant
)
AS SELECT parse_json(column1) as src
FROM values
('{
"topleveldate" : "2017-04-28",
import modin.pandas as pd | |
import snowflake.snowpark.modin.plugin | |
from prophet import Prophet | |
def model( dbt, session ): | |
dbt.config( | |
materialized="table", # the incremental materialization is also supported | |
packages=['pandas==2.2.1','modin==0.28.1','Prophet','holidays==0.18','snowflake-snowpark-python[modin]'], # how to import python libraries in dbt's context | |
python_version="3.11" |
import enum | |
import os | |
import time | |
# Be sure to `pip install requests` in your python environment | |
import requests | |
ACCOUNT_ID = 39 | |
JOB_ID = 302 |
{% materialization incremental_lastrun_auto, adapter='snowflake' -%} | |
{% set original_query_tag = set_query_tag() %} | |
{%- set unique_key = config.get('unique_key') -%} | |
{%- set full_refresh_mode = (should_full_refresh()) -%} | |
{% set target_relation = this %} | |
{% set existing_relation = load_relation(this) %} | |
{% set tmp_relation = make_temp_relation(this) %} |
#example from https://towardsdatascience.com/reduce-warehouse-space-with-the-pareto-principle-using-python-e722a6babe0e | |
#optimizing warehouse location based on sku frequency using pareto 80/20 principle | |
import pandas as pd | |
def model(dbt, session): | |
dbt.config( | |
materialized="table", | |
packages=["pandas"] #https://repo.anaconda.com/pkgs/snowflake/ |
{% macro add_search_optimization(relation,columns) %} | |
{# check if the relation has search optimization added #} | |
{%- call statement('search_optimization', fetch_result=True) -%} | |
describe search optimization on {{ relation }} | |
{%- endcall %} | |
{% if execute %} | |
{% set result = load_result('search_optimization') %} | |
{%- set result_data = result['data'] -%} |
=== | |
Compiled SQL: | |
select | |
EXTRACT(YEAR from ship_date), | |
sum(case when ship_mode = 'FOB' then gross_item_sales_amount end) as FOB_A_amount, | |
sum(case when ship_mode = 'TRUCK' then gross_item_sales_amount end) as TRUCK_N_amount, | |
sum(case when ship_mode = 'MAIL' then gross_item_sales_amount end) as MAIL_N_amount, | |
sum(case when ship_mode = 'SHIP' then gross_item_sales_amount end) as SHIP_R_amount, |
/* Create a pivot table with dynamic columns based on the ship modes that are in the system */ | |
{%- call statement('result', fetch_result=True) -%} | |
{# this pulls the unique ship modes from the fct_order_items table #} | |
select ship_mode from {{ ref('fct_order_items') }} group by 1 | |
{%- endcall %} | |
{% set ship_modes = load_result('result').table.columns[0].values() %} |
First run this in snowflake:
(
src variant
)
AS SELECT parse_json(column1) as src
FROM values
('{
"topleveldate" : "2017-04-28",
models: | |
- name: dim_customers | |
description: Customer dimensions table | |
columns: | |
- name: customer_key | |
description: Primary key on the customers table | |
tests: | |
- unique | |
- not_null | |
- name: region |