Skip to content

Instantly share code, notes, and snippets.

@robkisk
Last active February 6, 2025 13:12
Show Gist options
  • Save robkisk/cda4c1dbc48d67d82ca3b2c0457a6984 to your computer and use it in GitHub Desktop.
Save robkisk/cda4c1dbc48d67d82ca3b2c0457a6984 to your computer and use it in GitHub Desktop.
DABs Examples
resources:
jobs:
<some-unique-programmatic-identifier-for-this-job>:
# ...
job_clusters:
- job_cluster_key: <some-unique-programmatic-identifier-for-this-key>
new_cluster:
# Cluster settings.
targets:
<some-unique-programmatic-identifier-for-this-target>:
resources:
jobs:
<the-matching-programmatic-identifier-for-this-job>:
# ...
job_clusters:
- job_cluster_key: <the-matching-programmatic-identifier-for-this-key>
# Any more cluster settings to join with the settings from the
# resources mapping for the matching top-level job_cluster_key.
resources:
jobs:
my-dbt-job:
name: my-dbt-job
tasks:
- task_key: my-dbt-task
dbt_task:
commands:
- "dbt deps"
- "dbt seed"
- "dbt run"
project_directory: /Users/someone@example.com/Testing
warehouse_id: 1a111111a1111aa1
libraries:
- pypi:
package: "dbt-databricks>=1.0.0,<2.0.0"
resources:
jobs:
my_job:
name: my_job
tasks:
- task_key: generate_countries_list
notebook_task:
notebook_path: ../src/generate_countries_list.ipnyb
- task_key: process_countries
depends_on:
- task_key: generate_countries_list
for_each_task:
inputs: "{{tasks.generate_countries_list.values.countries}}"
task:
task_key: process_countries_iteration
notebook_task:
notebook_path: ../src/process_countries_notebook.ipnyb
resources:
jobs:
my-jar-job:
name: my-jar-job
tasks:
- task_key: my-jar-task
spark_jar_task:
main_class_name: org.example.com.Main
libraries:
- jar: /Volumes/main/default/my-volume/my-project-0.1.0-SNAPSHOT.jar
# ...
resources:
jobs:
my-job:
name: my-job
job_clusters:
- job_cluster_key: my-cluster
new_cluster:
spark_version: 13.3.x-scala2.12
node_type_id: i3.xlarge
num_workers: 1
targets:
development:
resources:
jobs:
my-job:
name: my-job
job_clusters:
- job_cluster_key: my-cluster
new_cluster:
spark_version: 12.2.x-scala2.12
num_workers: 2
# ...
# ...
resources:
jobs:
my-job:
name: my-job
job_clusters:
- job_cluster_key: my-cluster
new_cluster:
spark_version: 13.3.x-scala2.12
targets:
development:
resources:
jobs:
my-job:
name: my-job
job_clusters:
- job_cluster_key: my-cluster
new_cluster:
node_type_id: i3.xlarge
num_workers: 1
# ...
resources:
jobs:
my-notebook-job:
name: my-notebook-job
tasks:
- task_key: my-notebook-task
notebook_task:
notebook_path: ./my-notebook.ipynb
parameters:
- name: my_job_run_id
default: "{{job.run_id}}"
# ...
resources:
pipelines:
<some-unique-programmatic-identifier-for-this-pipeline>:
# ...
clusters:
- label: default | maintenance
# Cluster settings.
targets:
<some-unique-programmatic-identifier-for-this-target>:
resources:
pipelines:
<the-matching-programmatic-identifier-for-this-pipeline>:
# ...
clusters:
- label: default | maintenance
# Any more cluster settings to join with the settings from the
# resources mapping for the matching top-level label.
# ...
# ...
resources:
pipelines:
my-pipeline:
clusters:
- label: default
node_type_id: i3.xlarge
targets:
development:
resources:
pipelines:
my-pipeline:
clusters:
- label: default
num_workers: 1
# ...
# ...
resources:
pipelines:
my-pipeline:
clusters:
- label: default
node_type_id: i3.xlarge
num_workers: 1
targets:
development:
resources:
pipelines:
my-pipeline:
clusters:
- label: default
num_workers: 2
# ...
resources:
jobs:
my-pipeline-job:
name: my-pipeline-job
tasks:
- task_key: my-pipeline-task
pipeline_task:
pipeline_id: 11111111-1111-1111-1111-111111111111
resources:
jobs:
my-python-script-job:
name: my-python-script-job
tasks:
- task_key: my-python-script-task
spark_python_task:
python_file: ./my-script.py
resources:
jobs:
my-sql-file-job:
name: my-sql-file-job
tasks:
- task_key: my-sql-file-task
sql_task:
file:
path: /Users/someone@example.com/hello-world.sql
source: WORKSPACE
warehouse_id: 1a111111a1111aa1
resources:
jobs:
my-job:
name: my-job
tasks:
- task_key: condition_task
condition_task:
op: LESS_THAN
left: "{{job.repair_count}}"
right: "5"
- task_key: notebook_task
depends_on:
- task_key: condition_task
outcome: "true"
notebook_task:
notebook_path: ../src/notebook.ipynb
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment