Evan Calzolaio Ecalzo

## dev_limit.sql
{% macro dev_limit(sample_size=1000) -%}
    {%- if target.name == 'prod' -%}
    --if running in prod environment, run with all the records
    {%- else -%}
    --SAMPLE ({{ sample_size }} rows only to speedup dev/testing
    SAMPLE ({{ sample_size }} ROWS)
    {%- endif -%}
{%- endmacro %}

## dbt.py
def run_dbt_job(
    account_id: str,
    job_id: str,
    branch: str = None,
    schema_override: str = None,
    steps_override: List[str] = None,
    env_name: str = "staging",
):
    """
    Run a DBT job.

## trigger_dbt.py
import logging
import time
from datetime import datetime, timedelta

from airflow.macros.bieng import get_env # internal function to detect current env
from airflow.models import DAG
from airflow.operators.python_operator import PythonOperator
from datatools.apis.dbt import get_run_status, run_dbt_job  # internal package w/ API functions

default_args = {

## Jenkinsfile
pipeline {
    agent {
        kubernetes {
            cloud 'jenkins-k8-cloud'
            yaml """
apiVersion: v1
kind: Pod
spec:
  containers:
  - name: bieng-dbt-checks-container

## generate_schema_name.sql
{% macro generate_schema_name(custom_schema_name, node) %}

    {# if this model is created in prod or staging, use the schema designated in dbt_project.yml #}
    {% if  target.name == 'prod' and custom_schema_name %}
        {{ custom_schema_name | trim | upper }}

    {% elif target.name == 'staging' and custom_schema_name %}
        {{ custom_schema_name | trim | upper }}

    {# else, if this is created in dev, use the default (user) schema #}

## Medium_SplinterTurorial.py
from splinter import Browser
import time

# we are going to use NYC Open Data as an example
URL = 'https://opendata.cityofnewyork.us/data/'

# un-comment this if you are using Windows!
# executable_path = {'executable_path': 'chromedriver.exe'}
# browser = Browser('chrome', **executable_path)
	{% macro dev_limit(sample_size=1000) -%}
	{%- if target.name == 'prod' -%}
	--if running in prod environment, run with all the records
	{%- else -%}
	--SAMPLE ({{ sample_size }} rows only to speedup dev/testing
	SAMPLE ({{ sample_size }} ROWS)
	{%- endif -%}
	{%- endmacro %}
	def run_dbt_job(
	account_id: str,
	job_id: str,
	branch: str = None,
	schema_override: str = None,
	steps_override: List[str] = None,
	env_name: str = "staging",
	):
	"""
	Run a DBT job.
	import logging
	import time
	from datetime import datetime, timedelta

	from airflow.macros.bieng import get_env # internal function to detect current env
	from airflow.models import DAG
	from airflow.operators.python_operator import PythonOperator
	from datatools.apis.dbt import get_run_status, run_dbt_job # internal package w/ API functions

	default_args = {
	pipeline {
	agent {
	kubernetes {
	cloud 'jenkins-k8-cloud'
	yaml """
	apiVersion: v1
	kind: Pod
	spec:
	containers:
	- name: bieng-dbt-checks-container
	{% macro generate_schema_name(custom_schema_name, node) %}

	{# if this model is created in prod or staging, use the schema designated in dbt_project.yml #}
	{% if target.name == 'prod' and custom_schema_name %}
	{{ custom_schema_name \| trim \| upper }}

	{% elif target.name == 'staging' and custom_schema_name %}
	{{ custom_schema_name \| trim \| upper }}

	{# else, if this is created in dev, use the default (user) schema #}
	from splinter import Browser
	import time

	# we are going to use NYC Open Data as an example
	URL = 'https://opendata.cityofnewyork.us/data/'

	# un-comment this if you are using Windows!
	# executable_path = {'executable_path': 'chromedriver.exe'}
	# browser = Browser('chrome', **executable_path)